1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 21:31:18 +01:00

feat: introduce strict eviction policy

with strict eviction policy the descheduler only evict pods if the pod
contains a request for the given threshold. for example, if using a
threshold for an extended resource called `example.com/gpu` only pods
who request such a resource will be evicted.
This commit is contained in:
Ricardo Maraschini
2025-04-08 19:33:23 +02:00
parent cca28f7bbe
commit 35a7178df6
7 changed files with 149 additions and 6 deletions

View File

@@ -19,6 +19,7 @@ package nodeutilization
import (
"context"
"fmt"
"slices"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
@@ -74,9 +75,26 @@ func NewHighNodeUtilization(
highThresholds[rname] = MaxResourcePercentage
}
// get the resource names for which we have a threshold. this is
// later used when determining if we are going to evict a pod.
resourceThresholds := getResourceNames(args.Thresholds)
// by default we evict pods from the under utilized nodes even if they
// don't define a request for a given threshold. this works most of the
// times and there is an use case for it. When using the restrict mode
// we evaluate if the pod has a request for any of the resources the
// user has provided as threshold.
filters := []podutil.FilterFunc{handle.Evictor().Filter}
if slices.Contains(args.EvictionModes, EvictionModeOnlyThresholdingResources) {
filters = append(
filters,
withResourceRequestForAny(resourceThresholds...),
)
}
podFilter, err := podutil.
NewOptions().
WithFilter(handle.Evictor().Filter).
WithFilter(podutil.WrapFilterFuncs(filters...)).
BuildFilterFunc()
if err != nil {
return nil, fmt.Errorf("error initializing pod filter function: %v", err)
@@ -87,7 +105,7 @@ func NewHighNodeUtilization(
// all we consider the basic resources (cpu, memory, pods).
resourceNames := uniquifyResourceNames(
append(
getResourceNames(args.Thresholds),
resourceThresholds,
v1.ResourceCPU,
v1.ResourceMemory,
v1.ResourcePods,

View File

@@ -48,6 +48,7 @@ func TestHighNodeUtilization(t *testing.T) {
testCases := []struct {
name string
thresholds api.ResourceThresholds
evictionModes []EvictionMode
nodes []*v1.Node
pods []*v1.Pod
expectedPodsEvicted uint
@@ -433,6 +434,53 @@ func TestHighNodeUtilization(t *testing.T) {
},
expectedPodsEvicted: 0,
},
{
name: "with extended resource threshold and no extended resource pods",
thresholds: api.ResourceThresholds{
extendedResource: 40,
},
evictionModes: []EvictionMode{EvictionModeOnlyThresholdingResources},
nodes: []*v1.Node{
test.BuildTestNode(n1NodeName, 4000, 3000, 10, func(node *v1.Node) {
test.SetNodeExtendedResource(node, extendedResource, 10)
}),
test.BuildTestNode(n2NodeName, 4000, 3000, 10, func(node *v1.Node) {
test.SetNodeExtendedResource(node, extendedResource, 10)
}),
test.BuildTestNode(n3NodeName, 4000, 3000, 10, func(node *v1.Node) {
test.SetNodeExtendedResource(node, extendedResource, 10)
}),
},
pods: []*v1.Pod{
// pods on node1 have the extended resource
// request set and they put the node in the
// over utilization range.
test.BuildTestPod("p1", 100, 0, n1NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.SetPodExtendedResourceRequest(pod, extendedResource, 3)
}),
test.BuildTestPod("p2", 100, 0, n1NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.SetPodExtendedResourceRequest(pod, extendedResource, 3)
}),
// pods in the other nodes must not be evicted
// because they do not have the extended
// resource defined in their requests.
test.BuildTestPod("p3", 500, 0, n2NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
}),
test.BuildTestPod("p4", 500, 0, n2NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
}),
test.BuildTestPod("p5", 500, 0, n2NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
}),
test.BuildTestPod("p6", 500, 0, n2NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
}),
},
expectedPodsEvicted: 0,
},
}
for _, testCase := range testCases {
@@ -474,10 +522,13 @@ func TestHighNodeUtilization(t *testing.T) {
})
}
plugin, err := NewHighNodeUtilization(&HighNodeUtilizationArgs{
Thresholds: testCase.thresholds,
},
handle)
plugin, err := NewHighNodeUtilization(
&HighNodeUtilizationArgs{
Thresholds: testCase.thresholds,
EvictionModes: testCase.evictionModes,
},
handle,
)
if err != nil {
t.Fatalf("Unable to initialize the plugin: %v", err)
}

View File

@@ -32,6 +32,7 @@ import (
"k8s.io/utils/ptr"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
"sigs.k8s.io/descheduler/pkg/descheduler/pod"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/framework/plugins/nodeutilization/normalizer"
frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types"
@@ -752,3 +753,19 @@ func assessAvailableResourceInNodes(
return available, nil
}
// withResourceRequestForAny returns a filter function that checks if a pod
// has a resource request specified for any of the given resources names.
func withResourceRequestForAny(names ...v1.ResourceName) pod.FilterFunc {
return func(pod *v1.Pod) bool {
all := append(pod.Spec.Containers, pod.Spec.InitContainers...)
for _, name := range names {
for _, container := range all {
if _, ok := container.Resources.Requests[name]; ok {
return true
}
}
}
return false
}
}

View File

@@ -18,6 +18,18 @@ import (
"sigs.k8s.io/descheduler/pkg/api"
)
// EvictionMode describe a mode of eviction. See the list below for the
// available modes.
type EvictionMode string
const (
// EvictionModeOnlyThresholdingResources makes the descheduler evict
// only pods that have a resource request defined for any of the user
// provided thresholds. If the pod does not request the resource, it
// will not be evicted.
EvictionModeOnlyThresholdingResources EvictionMode = "OnlyThresholdingResources"
)
// +k8s:deepcopy-gen=true
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
@@ -48,6 +60,13 @@ type HighNodeUtilizationArgs struct {
Thresholds api.ResourceThresholds `json:"thresholds"`
NumberOfNodes int `json:"numberOfNodes,omitempty"`
// EvictionModes is a set of modes to be taken into account when the
// descheduler evicts pods. For example the mode
// `OnlyThresholdingResources` can be used to make sure the descheduler
// only evicts pods who have resource requests for the defined
// thresholds.
EvictionModes []EvictionMode `json:"evictionModes,omitempty"`
// Naming this one differently since namespaces are still
// considered while considering resources used by pods
// but then filtered out before eviction

View File

@@ -30,7 +30,25 @@ func ValidateHighNodeUtilizationArgs(obj runtime.Object) error {
if err != nil {
return err
}
// make sure we know about the eviction modes defined by the user.
return validateEvictionModes(args.EvictionModes)
}
// validateEvictionModes checks if the eviction modes are valid/known
// to the descheduler.
func validateEvictionModes(modes []EvictionMode) error {
// we are using this approach to make the code more extensible
// in the future.
validModes := map[EvictionMode]bool{
EvictionModeOnlyThresholdingResources: true,
}
for _, mode := range modes {
if validModes[mode] {
continue
}
return fmt.Errorf("invalid eviction mode %s", mode)
}
return nil
}

View File

@@ -37,6 +37,11 @@ func (in *HighNodeUtilizationArgs) DeepCopyInto(out *HighNodeUtilizationArgs) {
(*out)[key] = val
}
}
if in.EvictionModes != nil {
in, out := &in.EvictionModes, &out.EvictionModes
*out = make([]EvictionMode, len(*in))
copy(*out, *in)
}
if in.EvictableNamespaces != nil {
in, out := &in.EvictableNamespaces, &out.EvictableNamespaces
*out = new(api.Namespaces)