mirror of
https://github.com/kubernetes-sigs/descheduler.git
synced 2026-01-26 05:14:13 +01:00
Merge pull request #1663 from ricardomaraschini/strict-pod-eviction
feat: introduce strict eviction policy
This commit is contained in:
15
README.md
15
README.md
@@ -405,6 +405,12 @@ strategy evicts pods from `underutilized nodes` (those with usage below `thresho
|
||||
so that they can be recreated in appropriately utilized nodes.
|
||||
The strategy will abort if any number of `underutilized nodes` or `appropriately utilized nodes` is zero.
|
||||
|
||||
To control pod eviction from underutilized nodes, use the `evictionModes`
|
||||
array. A lenient policy, which evicts pods regardless of their resource
|
||||
requests, is the default. To enable a stricter policy that only evicts pods
|
||||
with resource requests defined for the provided threshold resources, add the
|
||||
option `OnlyThresholdingResources` to the `evictionModes` configuration.
|
||||
|
||||
**NOTE:** Node resource consumption is determined by the requests and limits of pods, not actual usage.
|
||||
This approach is chosen in order to maintain consistency with the kube-scheduler, which follows the same
|
||||
design for scheduling pods onto nodes. This means that resource usage as reported by Kubelet (or commands
|
||||
@@ -417,8 +423,15 @@ actual usage metrics. Implementing metrics-based descheduling is currently TODO
|
||||
|---|---|
|
||||
|`thresholds`|map(string:int)|
|
||||
|`numberOfNodes`|int|
|
||||
|`evictionModes`|list(string)|
|
||||
|`evictableNamespaces`|(see [namespace filtering](#namespace-filtering))|
|
||||
|
||||
**Supported Eviction Modes:**
|
||||
|
||||
|Name|Description|
|
||||
|---|---|
|
||||
|`OnlyThresholdingResources`|Evict only pods that have resource requests defined for the provided threshold resources.|
|
||||
|
||||
**Example:**
|
||||
|
||||
```yaml
|
||||
@@ -437,6 +450,8 @@ profiles:
|
||||
exclude:
|
||||
- "kube-system"
|
||||
- "namespace1"
|
||||
evictionModes:
|
||||
- "OnlyThresholdingResources"
|
||||
plugins:
|
||||
balance:
|
||||
enabled:
|
||||
|
||||
@@ -19,6 +19,7 @@ package nodeutilization
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"slices"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
@@ -74,9 +75,26 @@ func NewHighNodeUtilization(
|
||||
highThresholds[rname] = MaxResourcePercentage
|
||||
}
|
||||
|
||||
// get the resource names for which we have a threshold. this is
|
||||
// later used when determining if we are going to evict a pod.
|
||||
resourceThresholds := getResourceNames(args.Thresholds)
|
||||
|
||||
// by default we evict pods from the under utilized nodes even if they
|
||||
// don't define a request for a given threshold. this works most of the
|
||||
// times and there is an use case for it. When using the restrict mode
|
||||
// we evaluate if the pod has a request for any of the resources the
|
||||
// user has provided as threshold.
|
||||
filters := []podutil.FilterFunc{handle.Evictor().Filter}
|
||||
if slices.Contains(args.EvictionModes, EvictionModeOnlyThresholdingResources) {
|
||||
filters = append(
|
||||
filters,
|
||||
withResourceRequestForAny(resourceThresholds...),
|
||||
)
|
||||
}
|
||||
|
||||
podFilter, err := podutil.
|
||||
NewOptions().
|
||||
WithFilter(handle.Evictor().Filter).
|
||||
WithFilter(podutil.WrapFilterFuncs(filters...)).
|
||||
BuildFilterFunc()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error initializing pod filter function: %v", err)
|
||||
@@ -87,7 +105,7 @@ func NewHighNodeUtilization(
|
||||
// all we consider the basic resources (cpu, memory, pods).
|
||||
resourceNames := uniquifyResourceNames(
|
||||
append(
|
||||
getResourceNames(args.Thresholds),
|
||||
resourceThresholds,
|
||||
v1.ResourceCPU,
|
||||
v1.ResourceMemory,
|
||||
v1.ResourcePods,
|
||||
|
||||
@@ -48,6 +48,7 @@ func TestHighNodeUtilization(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
thresholds api.ResourceThresholds
|
||||
evictionModes []EvictionMode
|
||||
nodes []*v1.Node
|
||||
pods []*v1.Pod
|
||||
expectedPodsEvicted uint
|
||||
@@ -433,6 +434,53 @@ func TestHighNodeUtilization(t *testing.T) {
|
||||
},
|
||||
expectedPodsEvicted: 0,
|
||||
},
|
||||
{
|
||||
name: "with extended resource threshold and no extended resource pods",
|
||||
thresholds: api.ResourceThresholds{
|
||||
extendedResource: 40,
|
||||
},
|
||||
evictionModes: []EvictionMode{EvictionModeOnlyThresholdingResources},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(n1NodeName, 4000, 3000, 10, func(node *v1.Node) {
|
||||
test.SetNodeExtendedResource(node, extendedResource, 10)
|
||||
}),
|
||||
test.BuildTestNode(n2NodeName, 4000, 3000, 10, func(node *v1.Node) {
|
||||
test.SetNodeExtendedResource(node, extendedResource, 10)
|
||||
}),
|
||||
test.BuildTestNode(n3NodeName, 4000, 3000, 10, func(node *v1.Node) {
|
||||
test.SetNodeExtendedResource(node, extendedResource, 10)
|
||||
}),
|
||||
},
|
||||
pods: []*v1.Pod{
|
||||
// pods on node1 have the extended resource
|
||||
// request set and they put the node in the
|
||||
// over utilization range.
|
||||
test.BuildTestPod("p1", 100, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
test.SetPodExtendedResourceRequest(pod, extendedResource, 3)
|
||||
}),
|
||||
test.BuildTestPod("p2", 100, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
test.SetPodExtendedResourceRequest(pod, extendedResource, 3)
|
||||
}),
|
||||
// pods in the other nodes must not be evicted
|
||||
// because they do not have the extended
|
||||
// resource defined in their requests.
|
||||
test.BuildTestPod("p3", 500, 0, n2NodeName, func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
}),
|
||||
test.BuildTestPod("p4", 500, 0, n2NodeName, func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
}),
|
||||
test.BuildTestPod("p5", 500, 0, n2NodeName, func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
}),
|
||||
test.BuildTestPod("p6", 500, 0, n2NodeName, func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
}),
|
||||
},
|
||||
expectedPodsEvicted: 0,
|
||||
},
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
@@ -474,10 +522,13 @@ func TestHighNodeUtilization(t *testing.T) {
|
||||
})
|
||||
}
|
||||
|
||||
plugin, err := NewHighNodeUtilization(&HighNodeUtilizationArgs{
|
||||
Thresholds: testCase.thresholds,
|
||||
},
|
||||
handle)
|
||||
plugin, err := NewHighNodeUtilization(
|
||||
&HighNodeUtilizationArgs{
|
||||
Thresholds: testCase.thresholds,
|
||||
EvictionModes: testCase.evictionModes,
|
||||
},
|
||||
handle,
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("Unable to initialize the plugin: %v", err)
|
||||
}
|
||||
|
||||
@@ -32,6 +32,7 @@ import (
|
||||
"k8s.io/utils/ptr"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
|
||||
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
"sigs.k8s.io/descheduler/pkg/framework/plugins/nodeutilization/normalizer"
|
||||
frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types"
|
||||
@@ -752,3 +753,19 @@ func assessAvailableResourceInNodes(
|
||||
|
||||
return available, nil
|
||||
}
|
||||
|
||||
// withResourceRequestForAny returns a filter function that checks if a pod
|
||||
// has a resource request specified for any of the given resources names.
|
||||
func withResourceRequestForAny(names ...v1.ResourceName) pod.FilterFunc {
|
||||
return func(pod *v1.Pod) bool {
|
||||
all := append(pod.Spec.Containers, pod.Spec.InitContainers...)
|
||||
for _, name := range names {
|
||||
for _, container := range all {
|
||||
if _, ok := container.Resources.Requests[name]; ok {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,6 +18,18 @@ import (
|
||||
"sigs.k8s.io/descheduler/pkg/api"
|
||||
)
|
||||
|
||||
// EvictionMode describe a mode of eviction. See the list below for the
|
||||
// available modes.
|
||||
type EvictionMode string
|
||||
|
||||
const (
|
||||
// EvictionModeOnlyThresholdingResources makes the descheduler evict
|
||||
// only pods that have a resource request defined for any of the user
|
||||
// provided thresholds. If the pod does not request the resource, it
|
||||
// will not be evicted.
|
||||
EvictionModeOnlyThresholdingResources EvictionMode = "OnlyThresholdingResources"
|
||||
)
|
||||
|
||||
// +k8s:deepcopy-gen=true
|
||||
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
|
||||
|
||||
@@ -48,6 +60,13 @@ type HighNodeUtilizationArgs struct {
|
||||
Thresholds api.ResourceThresholds `json:"thresholds"`
|
||||
NumberOfNodes int `json:"numberOfNodes,omitempty"`
|
||||
|
||||
// EvictionModes is a set of modes to be taken into account when the
|
||||
// descheduler evicts pods. For example the mode
|
||||
// `OnlyThresholdingResources` can be used to make sure the descheduler
|
||||
// only evicts pods who have resource requests for the defined
|
||||
// thresholds.
|
||||
EvictionModes []EvictionMode `json:"evictionModes,omitempty"`
|
||||
|
||||
// Naming this one differently since namespaces are still
|
||||
// considered while considering resources used by pods
|
||||
// but then filtered out before eviction
|
||||
|
||||
@@ -30,7 +30,25 @@ func ValidateHighNodeUtilizationArgs(obj runtime.Object) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// make sure we know about the eviction modes defined by the user.
|
||||
return validateEvictionModes(args.EvictionModes)
|
||||
}
|
||||
|
||||
// validateEvictionModes checks if the eviction modes are valid/known
|
||||
// to the descheduler.
|
||||
func validateEvictionModes(modes []EvictionMode) error {
|
||||
// we are using this approach to make the code more extensible
|
||||
// in the future.
|
||||
validModes := map[EvictionMode]bool{
|
||||
EvictionModeOnlyThresholdingResources: true,
|
||||
}
|
||||
|
||||
for _, mode := range modes {
|
||||
if validModes[mode] {
|
||||
continue
|
||||
}
|
||||
return fmt.Errorf("invalid eviction mode %s", mode)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -37,6 +37,11 @@ func (in *HighNodeUtilizationArgs) DeepCopyInto(out *HighNodeUtilizationArgs) {
|
||||
(*out)[key] = val
|
||||
}
|
||||
}
|
||||
if in.EvictionModes != nil {
|
||||
in, out := &in.EvictionModes, &out.EvictionModes
|
||||
*out = make([]EvictionMode, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
if in.EvictableNamespaces != nil {
|
||||
in, out := &in.EvictableNamespaces, &out.EvictableNamespaces
|
||||
*out = new(api.Namespaces)
|
||||
|
||||
Reference in New Issue
Block a user