mirror of
https://github.com/kubernetes-sigs/descheduler.git
synced 2026-01-26 21:31:18 +01:00
Merge pull request #175 from swatisehgal/dev/strategyTaintTol
Strategy to consider taints and tolerations in Descheduler
This commit is contained in:
13
README.md
13
README.md
@@ -158,7 +158,7 @@ $ kubectl create -f descheduler-job.yaml
|
||||
## Policy and Strategies
|
||||
|
||||
Descheduler's policy is configurable and includes strategies to be enabled or disabled.
|
||||
Four strategies, `RemoveDuplicates`, `LowNodeUtilization`, `RemovePodsViolatingInterPodAntiAffinity`, `RemovePodsViolatingNodeAffinity` are currently implemented.
|
||||
Five strategies, `RemoveDuplicates`, `LowNodeUtilization`, `RemovePodsViolatingInterPodAntiAffinity`, `RemovePodsViolatingNodeAffinity` , `RemovePodsViolatingNodeTaints` are currently implemented.
|
||||
As part of the policy, the parameters associated with the strategies can be configured too.
|
||||
By default, all strategies are enabled.
|
||||
|
||||
@@ -248,7 +248,17 @@ strategies:
|
||||
nodeAffinityType:
|
||||
- "requiredDuringSchedulingIgnoredDuringExecution"
|
||||
```
|
||||
### RemovePodsViolatingNodeTaints
|
||||
|
||||
This strategy makes sure that pods violating NoSchedule taints on nodes are removed. For example: there is a pod "podA" with toleration to tolerate a taint ``key=value:NoSchedule`` scheduled and running on the tainted node. If the node's taint is subsequently updated/removed, taint is no longer satisfied by its pods' tolerations and will be evicted. The policy file should look like:
|
||||
|
||||
````
|
||||
apiVersion: "descheduler/v1alpha1"
|
||||
kind: "DeschedulerPolicy"
|
||||
strategies:
|
||||
"RemovePodsViolatingNodeTaints":
|
||||
enabled: true
|
||||
````
|
||||
## Pod Evictions
|
||||
|
||||
When the descheduler decides to evict pods from a node, it employs following general mechanism:
|
||||
@@ -271,7 +281,6 @@ disruption budget (PDB). The pods are evicted by using eviction subresource to h
|
||||
|
||||
This roadmap is not in any particular order.
|
||||
|
||||
* Strategy to consider taints and tolerations
|
||||
* Consideration of pod affinity
|
||||
* Strategy to consider pod life time
|
||||
* Strategy to consider number of pending pods
|
||||
|
||||
@@ -65,6 +65,6 @@ func Run(rs *options.DeschedulerServer) error {
|
||||
strategies.LowNodeUtilization(rs, deschedulerPolicy.Strategies["LowNodeUtilization"], evictionPolicyGroupVersion, nodes, nodePodCount)
|
||||
strategies.RemovePodsViolatingInterPodAntiAffinity(rs, deschedulerPolicy.Strategies["RemovePodsViolatingInterPodAntiAffinity"], evictionPolicyGroupVersion, nodes, nodePodCount)
|
||||
strategies.RemovePodsViolatingNodeAffinity(rs, deschedulerPolicy.Strategies["RemovePodsViolatingNodeAffinity"], evictionPolicyGroupVersion, nodes, nodePodCount)
|
||||
|
||||
strategies.RemovePodsViolatingNodeTaints(rs, deschedulerPolicy.Strategies["RemovePodsViolatingNodeTaints"], evictionPolicyGroupVersion, nodes, nodePodCount)
|
||||
return nil
|
||||
}
|
||||
|
||||
138
pkg/descheduler/strategies/node_taint.go
Normal file
138
pkg/descheduler/strategies/node_taint.go
Normal file
@@ -0,0 +1,138 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package strategies
|
||||
|
||||
import (
|
||||
"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
|
||||
"sigs.k8s.io/descheduler/pkg/api"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
|
||||
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/klog"
|
||||
)
|
||||
|
||||
const (
|
||||
TolerationOpExists v1.TolerationOperator = "Exists"
|
||||
TolerationOpEqual v1.TolerationOperator = "Equal"
|
||||
)
|
||||
|
||||
// RemovePodsViolatingNodeTaints with elimination strategy
|
||||
func RemovePodsViolatingNodeTaints(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, policyGroupVersion string, nodes []*v1.Node, nodePodCount nodePodEvictedCount) {
|
||||
if !strategy.Enabled {
|
||||
return
|
||||
}
|
||||
deletePodsViolatingNodeTaints(ds.Client, policyGroupVersion, nodes, ds.DryRun, nodePodCount, ds.MaxNoOfPodsToEvictPerNode, ds.EvictLocalStoragePods)
|
||||
}
|
||||
|
||||
// deletePodsViolatingNodeTaints evicts pods on the node which violate NoSchedule Taints on nodes
|
||||
func deletePodsViolatingNodeTaints(client clientset.Interface, policyGroupVersion string, nodes []*v1.Node, dryRun bool, nodePodCount nodePodEvictedCount, maxPodsToEvict int, evictLocalStoragePods bool) int {
|
||||
podsEvicted := 0
|
||||
for _, node := range nodes {
|
||||
klog.V(1).Infof("Processing node: %#v\n", node.Name)
|
||||
pods, err := podutil.ListEvictablePodsOnNode(client, node, evictLocalStoragePods)
|
||||
if err != nil {
|
||||
//no pods evicted as error encountered retrieving evictable Pods
|
||||
return 0
|
||||
}
|
||||
totalPods := len(pods)
|
||||
for i := 0; i < totalPods; i++ {
|
||||
if maxPodsToEvict > 0 && nodePodCount[node]+1 > maxPodsToEvict {
|
||||
break
|
||||
}
|
||||
if !checkPodsSatisfyTolerations(pods[i], node) {
|
||||
success, err := evictions.EvictPod(client, pods[i], policyGroupVersion, dryRun)
|
||||
if !success {
|
||||
klog.Errorf("Error when evicting pod: %#v (%#v)\n", pods[i].Name, err)
|
||||
} else {
|
||||
nodePodCount[node]++
|
||||
klog.V(1).Infof("Evicted pod: %#v (%#v)", pods[i].Name, err)
|
||||
}
|
||||
}
|
||||
}
|
||||
podsEvicted += nodePodCount[node]
|
||||
}
|
||||
return podsEvicted
|
||||
}
|
||||
|
||||
// checkPodsSatisfyTolerations checks if the node's taints (NoSchedule) are still satisfied by pods' tolerations.
|
||||
func checkPodsSatisfyTolerations(pod *v1.Pod, node *v1.Node) bool {
|
||||
tolerations := pod.Spec.Tolerations
|
||||
taints := node.Spec.Taints
|
||||
if len(taints) == 0 {
|
||||
return true
|
||||
}
|
||||
noScheduleTaints := getNoScheduleTaints(taints)
|
||||
if !allTaintsTolerated(noScheduleTaints, tolerations) {
|
||||
klog.V(2).Infof("Not all taints are tolerated after update for Pod %v on node %v", pod.Name, node.Name)
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// getNoScheduleTaints return a slice of NoSchedule taints from the a slice of taints that it receives.
|
||||
func getNoScheduleTaints(taints []v1.Taint) []v1.Taint {
|
||||
result := []v1.Taint{}
|
||||
for i := range taints {
|
||||
if taints[i].Effect == v1.TaintEffectNoSchedule {
|
||||
result = append(result, taints[i])
|
||||
}
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
//toleratesTaint returns true if a toleration tolerates a taint, or false otherwise
|
||||
func toleratesTaint(toleration *v1.Toleration, taint *v1.Taint) bool {
|
||||
|
||||
if (len(toleration.Key) > 0 && toleration.Key != taint.Key) ||
|
||||
(len(toleration.Effect) > 0 && toleration.Effect != taint.Effect) {
|
||||
return false
|
||||
}
|
||||
switch toleration.Operator {
|
||||
// empty operator means Equal
|
||||
case "", TolerationOpEqual:
|
||||
return toleration.Value == taint.Value
|
||||
case TolerationOpExists:
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// allTaintsTolerated returns true if all are tolerated, or false otherwise.
|
||||
func allTaintsTolerated(taints []v1.Taint, tolerations []v1.Toleration) bool {
|
||||
if len(taints) == 0 {
|
||||
return true
|
||||
}
|
||||
if len(tolerations) == 0 && len(taints) > 0 {
|
||||
return false
|
||||
}
|
||||
for i := range taints {
|
||||
tolerated := false
|
||||
for j := range tolerations {
|
||||
if toleratesTaint(&tolerations[j], &taints[i]) {
|
||||
tolerated = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !tolerated {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
295
pkg/descheduler/strategies/node_taint_test.go
Normal file
295
pkg/descheduler/strategies/node_taint_test.go
Normal file
@@ -0,0 +1,295 @@
|
||||
package strategies
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
core "k8s.io/client-go/testing"
|
||||
"sigs.k8s.io/descheduler/test"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func createNoScheduleTaint(key, value string, index int) v1.Taint {
|
||||
return v1.Taint{
|
||||
Key: "testTaint" + fmt.Sprintf("%v", index),
|
||||
Value: "test" + fmt.Sprintf("%v", index),
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
}
|
||||
}
|
||||
|
||||
func addTaintsToNode(node *v1.Node, key, value string, indices []int) *v1.Node {
|
||||
taints := []v1.Taint{}
|
||||
for _, index := range indices {
|
||||
taints = append(taints, createNoScheduleTaint(key, value, index))
|
||||
}
|
||||
node.Spec.Taints = taints
|
||||
return node
|
||||
}
|
||||
|
||||
func addTolerationToPod(pod *v1.Pod, key, value string, index int) *v1.Pod {
|
||||
if pod.Annotations == nil {
|
||||
pod.Annotations = map[string]string{}
|
||||
}
|
||||
|
||||
pod.Spec.Tolerations = []v1.Toleration{{Key: key + fmt.Sprintf("%v", index), Value: value + fmt.Sprintf("%v", index), Effect: v1.TaintEffectNoSchedule}}
|
||||
|
||||
return pod
|
||||
}
|
||||
|
||||
func TestDeletePodsViolatingNodeTaints(t *testing.T) {
|
||||
|
||||
node1 := test.BuildTestNode("n1", 2000, 3000, 10)
|
||||
node1 = addTaintsToNode(node1, "testTaint", "test", []int{1})
|
||||
node2 := test.BuildTestNode("n2", 2000, 3000, 10)
|
||||
node1 = addTaintsToNode(node2, "testingTaint", "testing", []int{1})
|
||||
|
||||
p1 := test.BuildTestPod("p1", 100, 0, node1.Name)
|
||||
p2 := test.BuildTestPod("p2", 100, 0, node1.Name)
|
||||
p3 := test.BuildTestPod("p3", 100, 0, node1.Name)
|
||||
p4 := test.BuildTestPod("p4", 100, 0, node1.Name)
|
||||
p5 := test.BuildTestPod("p5", 100, 0, node1.Name)
|
||||
p6 := test.BuildTestPod("p6", 100, 0, node1.Name)
|
||||
|
||||
p1.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
p2.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
p3.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
p4.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
p5.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
p6.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
p7 := test.BuildTestPod("p7", 100, 0, node2.Name)
|
||||
p8 := test.BuildTestPod("p8", 100, 0, node2.Name)
|
||||
p9 := test.BuildTestPod("p9", 100, 0, node2.Name)
|
||||
p10 := test.BuildTestPod("p10", 100, 0, node2.Name)
|
||||
p11 := test.BuildTestPod("p11", 100, 0, node2.Name)
|
||||
p11.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
|
||||
// The following 4 pods won't get evicted.
|
||||
// A Critical Pod.
|
||||
p7.Namespace = "kube-system"
|
||||
p7.Annotations = test.GetCriticalPodAnnotation()
|
||||
|
||||
// A daemonset.
|
||||
p8.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
|
||||
// A pod with local storage.
|
||||
p9.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
p9.Spec.Volumes = []v1.Volume{
|
||||
{
|
||||
Name: "sample",
|
||||
VolumeSource: v1.VolumeSource{
|
||||
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
||||
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
|
||||
},
|
||||
},
|
||||
}
|
||||
// A Mirror Pod.
|
||||
p10.Annotations = test.GetMirrorPodAnnotation()
|
||||
|
||||
p1 = addTolerationToPod(p1, "testTaint", "test", 1)
|
||||
p3 = addTolerationToPod(p3, "testTaint", "test", 1)
|
||||
p4 = addTolerationToPod(p4, "testTaintX", "testX", 1)
|
||||
|
||||
tests := []struct {
|
||||
description string
|
||||
nodes []*v1.Node
|
||||
pods []v1.Pod
|
||||
evictLocalStoragePods bool
|
||||
npe nodePodEvictedCount
|
||||
maxPodsToEvict int
|
||||
expectedEvictedPodCount int
|
||||
}{
|
||||
|
||||
{
|
||||
description: "Pods not tolerating node taint should be evicted",
|
||||
pods: []v1.Pod{*p1, *p2, *p3},
|
||||
nodes: []*v1.Node{node1},
|
||||
evictLocalStoragePods: false,
|
||||
npe: nodePodEvictedCount{node1: 0},
|
||||
maxPodsToEvict: 0,
|
||||
expectedEvictedPodCount: 1, //p2 gets evicted
|
||||
},
|
||||
{
|
||||
description: "Pods with tolerations but not tolerating node taint should be evicted",
|
||||
pods: []v1.Pod{*p1, *p3, *p4},
|
||||
nodes: []*v1.Node{node1},
|
||||
evictLocalStoragePods: false,
|
||||
npe: nodePodEvictedCount{node1: 0},
|
||||
maxPodsToEvict: 0,
|
||||
expectedEvictedPodCount: 1, //p4 gets evicted
|
||||
},
|
||||
{
|
||||
description: "Only <maxPodsToEvict> number of Pods not tolerating node taint should be evicted",
|
||||
pods: []v1.Pod{*p1, *p5, *p6},
|
||||
nodes: []*v1.Node{node1},
|
||||
evictLocalStoragePods: false,
|
||||
npe: nodePodEvictedCount{node1: 0},
|
||||
maxPodsToEvict: 1,
|
||||
expectedEvictedPodCount: 1, //p5 or p6 gets evicted
|
||||
},
|
||||
{
|
||||
description: "Critical pods not tolerating node taint should not be evicted",
|
||||
pods: []v1.Pod{*p7, *p8, *p9, *p10},
|
||||
nodes: []*v1.Node{node2},
|
||||
evictLocalStoragePods: false,
|
||||
npe: nodePodEvictedCount{node2: 0},
|
||||
maxPodsToEvict: 0,
|
||||
expectedEvictedPodCount: 0,
|
||||
},
|
||||
{
|
||||
description: "Critical pods except storage pods not tolerating node taint should not be evicted",
|
||||
pods: []v1.Pod{*p7, *p8, *p9, *p10},
|
||||
nodes: []*v1.Node{node2},
|
||||
evictLocalStoragePods: true,
|
||||
npe: nodePodEvictedCount{node2: 0},
|
||||
maxPodsToEvict: 0,
|
||||
expectedEvictedPodCount: 1,
|
||||
},
|
||||
{
|
||||
description: "Critical and non critical pods, only non critical pods not tolerating node taint should be evicted",
|
||||
pods: []v1.Pod{*p7, *p8, *p10, *p11},
|
||||
nodes: []*v1.Node{node2},
|
||||
evictLocalStoragePods: false,
|
||||
npe: nodePodEvictedCount{node2: 0},
|
||||
maxPodsToEvict: 0,
|
||||
expectedEvictedPodCount: 1,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
|
||||
// create fake client
|
||||
fakeClient := &fake.Clientset{}
|
||||
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
|
||||
return true, &v1.PodList{Items: tc.pods}, nil
|
||||
})
|
||||
|
||||
actualEvictedPodCount := deletePodsViolatingNodeTaints(fakeClient, "v1", tc.nodes, false, tc.npe, tc.maxPodsToEvict, tc.evictLocalStoragePods)
|
||||
if actualEvictedPodCount != tc.expectedEvictedPodCount {
|
||||
t.Errorf("Test %#v failed, Unexpected no of pods evicted: pods evicted: %d, expected: %d", tc.description, actualEvictedPodCount, tc.expectedEvictedPodCount)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestToleratesTaint(t *testing.T) {
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
toleration v1.Toleration
|
||||
taint v1.Taint
|
||||
expectTolerated bool
|
||||
}{
|
||||
{
|
||||
description: "toleration and taint have the same key and effect, and operator is Exists, and taint has no value, expect tolerated",
|
||||
toleration: v1.Toleration{
|
||||
Key: "foo",
|
||||
Operator: TolerationOpExists,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
taint: v1.Taint{
|
||||
Key: "foo",
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
expectTolerated: true,
|
||||
},
|
||||
{
|
||||
description: "toleration and taint have the same key and effect, and operator is Exists, and taint has some value, expect tolerated",
|
||||
toleration: v1.Toleration{
|
||||
Key: "foo",
|
||||
Operator: TolerationOpExists,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
taint: v1.Taint{
|
||||
Key: "foo",
|
||||
Value: "bar",
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
expectTolerated: true,
|
||||
},
|
||||
{
|
||||
description: "toleration and taint have the same effect, toleration has empty key and operator is Exists, means match all taints, expect tolerated",
|
||||
toleration: v1.Toleration{
|
||||
Key: "",
|
||||
Operator: TolerationOpExists,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
taint: v1.Taint{
|
||||
Key: "foo",
|
||||
Value: "bar",
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
expectTolerated: true,
|
||||
},
|
||||
{
|
||||
description: "toleration and taint have the same key, effect and value, and operator is Equal, expect tolerated",
|
||||
toleration: v1.Toleration{
|
||||
Key: "foo",
|
||||
Operator: TolerationOpEqual,
|
||||
Value: "bar",
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
taint: v1.Taint{
|
||||
Key: "foo",
|
||||
Value: "bar",
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
expectTolerated: true,
|
||||
},
|
||||
{
|
||||
description: "toleration and taint have the same key and effect, but different values, and operator is Equal, expect not tolerated",
|
||||
toleration: v1.Toleration{
|
||||
Key: "foo",
|
||||
Operator: TolerationOpEqual,
|
||||
Value: "value1",
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
taint: v1.Taint{
|
||||
Key: "foo",
|
||||
Value: "value2",
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
expectTolerated: false,
|
||||
},
|
||||
{
|
||||
description: "toleration and taint have the same key and value, but different effects, and operator is Equal, expect not tolerated",
|
||||
toleration: v1.Toleration{
|
||||
Key: "foo",
|
||||
Operator: TolerationOpEqual,
|
||||
Value: "bar",
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
taint: v1.Taint{
|
||||
Key: "foo",
|
||||
Value: "bar",
|
||||
Effect: v1.TaintEffectNoExecute,
|
||||
},
|
||||
expectTolerated: false,
|
||||
},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
if tolerated := toleratesTaint(&tc.toleration, &tc.taint); tc.expectTolerated != tolerated {
|
||||
t.Errorf("[%s] expect %v, got %v: toleration %+v, taint %s", tc.description, tc.expectTolerated, tolerated, tc.toleration, tc.taint.ToString())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestFilterNoExecuteTaints(t *testing.T) {
|
||||
taints := []v1.Taint{
|
||||
{
|
||||
Key: "one",
|
||||
Value: "one",
|
||||
Effect: v1.TaintEffectNoExecute,
|
||||
},
|
||||
{
|
||||
Key: "two",
|
||||
Value: "two",
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
taints = getNoScheduleTaints(taints)
|
||||
if len(taints) != 1 || taints[0].Key != "two" {
|
||||
t.Errorf("Filtering doesn't work. Got %v", taints)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user