mirror of
https://github.com/kubernetes-sigs/descheduler.git
synced 2026-01-26 21:31:18 +01:00
feat: introduce strict eviction policy
with strict eviction policy the descheduler only evict pods if the pod contains a request for the given threshold. for example, if using a threshold for an extended resource called `example.com/gpu` only pods who request such a resource will be evicted.
This commit is contained in:
@@ -19,6 +19,7 @@ package nodeutilization
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"slices"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
@@ -74,9 +75,26 @@ func NewHighNodeUtilization(
|
||||
highThresholds[rname] = MaxResourcePercentage
|
||||
}
|
||||
|
||||
// get the resource names for which we have a threshold. this is
|
||||
// later used when determining if we are going to evict a pod.
|
||||
resourceThresholds := getResourceNames(args.Thresholds)
|
||||
|
||||
// by default we evict pods from the under utilized nodes even if they
|
||||
// don't define a request for a given threshold. this works most of the
|
||||
// times and there is an use case for it. When using the restrict mode
|
||||
// we evaluate if the pod has a request for any of the resources the
|
||||
// user has provided as threshold.
|
||||
filters := []podutil.FilterFunc{handle.Evictor().Filter}
|
||||
if slices.Contains(args.EvictionModes, EvictionModeOnlyThresholdingResources) {
|
||||
filters = append(
|
||||
filters,
|
||||
withResourceRequestForAny(resourceThresholds...),
|
||||
)
|
||||
}
|
||||
|
||||
podFilter, err := podutil.
|
||||
NewOptions().
|
||||
WithFilter(handle.Evictor().Filter).
|
||||
WithFilter(podutil.WrapFilterFuncs(filters...)).
|
||||
BuildFilterFunc()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error initializing pod filter function: %v", err)
|
||||
@@ -87,7 +105,7 @@ func NewHighNodeUtilization(
|
||||
// all we consider the basic resources (cpu, memory, pods).
|
||||
resourceNames := uniquifyResourceNames(
|
||||
append(
|
||||
getResourceNames(args.Thresholds),
|
||||
resourceThresholds,
|
||||
v1.ResourceCPU,
|
||||
v1.ResourceMemory,
|
||||
v1.ResourcePods,
|
||||
|
||||
@@ -48,6 +48,7 @@ func TestHighNodeUtilization(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
thresholds api.ResourceThresholds
|
||||
evictionModes []EvictionMode
|
||||
nodes []*v1.Node
|
||||
pods []*v1.Pod
|
||||
expectedPodsEvicted uint
|
||||
@@ -433,6 +434,53 @@ func TestHighNodeUtilization(t *testing.T) {
|
||||
},
|
||||
expectedPodsEvicted: 0,
|
||||
},
|
||||
{
|
||||
name: "with extended resource threshold and no extended resource pods",
|
||||
thresholds: api.ResourceThresholds{
|
||||
extendedResource: 40,
|
||||
},
|
||||
evictionModes: []EvictionMode{EvictionModeOnlyThresholdingResources},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(n1NodeName, 4000, 3000, 10, func(node *v1.Node) {
|
||||
test.SetNodeExtendedResource(node, extendedResource, 10)
|
||||
}),
|
||||
test.BuildTestNode(n2NodeName, 4000, 3000, 10, func(node *v1.Node) {
|
||||
test.SetNodeExtendedResource(node, extendedResource, 10)
|
||||
}),
|
||||
test.BuildTestNode(n3NodeName, 4000, 3000, 10, func(node *v1.Node) {
|
||||
test.SetNodeExtendedResource(node, extendedResource, 10)
|
||||
}),
|
||||
},
|
||||
pods: []*v1.Pod{
|
||||
// pods on node1 have the extended resource
|
||||
// request set and they put the node in the
|
||||
// over utilization range.
|
||||
test.BuildTestPod("p1", 100, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
test.SetPodExtendedResourceRequest(pod, extendedResource, 3)
|
||||
}),
|
||||
test.BuildTestPod("p2", 100, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
test.SetPodExtendedResourceRequest(pod, extendedResource, 3)
|
||||
}),
|
||||
// pods in the other nodes must not be evicted
|
||||
// because they do not have the extended
|
||||
// resource defined in their requests.
|
||||
test.BuildTestPod("p3", 500, 0, n2NodeName, func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
}),
|
||||
test.BuildTestPod("p4", 500, 0, n2NodeName, func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
}),
|
||||
test.BuildTestPod("p5", 500, 0, n2NodeName, func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
}),
|
||||
test.BuildTestPod("p6", 500, 0, n2NodeName, func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
}),
|
||||
},
|
||||
expectedPodsEvicted: 0,
|
||||
},
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
@@ -474,10 +522,13 @@ func TestHighNodeUtilization(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
plugin, err := NewHighNodeUtilization(&HighNodeUtilizationArgs{
|
||||
Thresholds: testCase.thresholds,
|
||||
},
|
||||
handle)
|
||||
plugin, err := NewHighNodeUtilization(
|
||||
&HighNodeUtilizationArgs{
|
||||
Thresholds: testCase.thresholds,
|
||||
EvictionModes: testCase.evictionModes,
|
||||
},
|
||||
handle,
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("Unable to initialize the plugin: %v", err)
|
||||
}
|
||||
|
||||
@@ -32,6 +32,7 @@ import (
|
||||
"k8s.io/utils/ptr"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
|
||||
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
"sigs.k8s.io/descheduler/pkg/framework/plugins/nodeutilization/normalizer"
|
||||
frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types"
|
||||
@@ -752,3 +753,19 @@ func assessAvailableResourceInNodes(
|
||||
|
||||
return available, nil
|
||||
}
|
||||
|
||||
// withResourceRequestForAny returns a filter function that checks if a pod
|
||||
// has a resource request specified for any of the given resources names.
|
||||
func withResourceRequestForAny(names ...v1.ResourceName) pod.FilterFunc {
|
||||
return func(pod *v1.Pod) bool {
|
||||
all := append(pod.Spec.Containers, pod.Spec.InitContainers...)
|
||||
for _, name := range names {
|
||||
for _, container := range all {
|
||||
if _, ok := container.Resources.Requests[name]; ok {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,6 +18,18 @@ import (
|
||||
"sigs.k8s.io/descheduler/pkg/api"
|
||||
)
|
||||
|
||||
// EvictionMode describe a mode of eviction. See the list below for the
|
||||
// available modes.
|
||||
type EvictionMode string
|
||||
|
||||
const (
|
||||
// EvictionModeOnlyThresholdingResources makes the descheduler evict
|
||||
// only pods that have a resource request defined for any of the user
|
||||
// provided thresholds. If the pod does not request the resource, it
|
||||
// will not be evicted.
|
||||
EvictionModeOnlyThresholdingResources EvictionMode = "OnlyThresholdingResources"
|
||||
)
|
||||
|
||||
// +k8s:deepcopy-gen=true
|
||||
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
|
||||
|
||||
@@ -48,6 +60,13 @@ type HighNodeUtilizationArgs struct {
|
||||
Thresholds api.ResourceThresholds `json:"thresholds"`
|
||||
NumberOfNodes int `json:"numberOfNodes,omitempty"`
|
||||
|
||||
// EvictionModes is a set of modes to be taken into account when the
|
||||
// descheduler evicts pods. For example the mode
|
||||
// `OnlyThresholdingResources` can be used to make sure the descheduler
|
||||
// only evicts pods who have resource requests for the defined
|
||||
// thresholds.
|
||||
EvictionModes []EvictionMode `json:"evictionModes,omitempty"`
|
||||
|
||||
// Naming this one differently since namespaces are still
|
||||
// considered while considering resources used by pods
|
||||
// but then filtered out before eviction
|
||||
|
||||
@@ -30,7 +30,25 @@ func ValidateHighNodeUtilizationArgs(obj runtime.Object) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// make sure we know about the eviction modes defined by the user.
|
||||
return validateEvictionModes(args.EvictionModes)
|
||||
}
|
||||
|
||||
// validateEvictionModes checks if the eviction modes are valid/known
|
||||
// to the descheduler.
|
||||
func validateEvictionModes(modes []EvictionMode) error {
|
||||
// we are using this approach to make the code more extensible
|
||||
// in the future.
|
||||
validModes := map[EvictionMode]bool{
|
||||
EvictionModeOnlyThresholdingResources: true,
|
||||
}
|
||||
|
||||
for _, mode := range modes {
|
||||
if validModes[mode] {
|
||||
continue
|
||||
}
|
||||
return fmt.Errorf("invalid eviction mode %s", mode)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -37,6 +37,11 @@ func (in *HighNodeUtilizationArgs) DeepCopyInto(out *HighNodeUtilizationArgs) {
|
||||
(*out)[key] = val
|
||||
}
|
||||
}
|
||||
if in.EvictionModes != nil {
|
||||
in, out := &in.EvictionModes, &out.EvictionModes
|
||||
*out = make([]EvictionMode, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
if in.EvictableNamespaces != nil {
|
||||
in, out := &in.EvictableNamespaces, &out.EvictableNamespaces
|
||||
*out = new(api.Namespaces)
|
||||
|
||||
Reference in New Issue
Block a user