1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 21:31:18 +01:00

Merge pull request #175 from swatisehgal/dev/strategyTaintTol

Strategy to consider taints and tolerations in Descheduler
This commit is contained in:
Kubernetes Prow Robot
2019-11-05 13:28:39 -08:00
committed by GitHub
4 changed files with 445 additions and 3 deletions

View File

@@ -158,7 +158,7 @@ $ kubectl create -f descheduler-job.yaml
## Policy and Strategies
Descheduler's policy is configurable and includes strategies to be enabled or disabled.
Four strategies, `RemoveDuplicates`, `LowNodeUtilization`, `RemovePodsViolatingInterPodAntiAffinity`, `RemovePodsViolatingNodeAffinity` are currently implemented.
Five strategies, `RemoveDuplicates`, `LowNodeUtilization`, `RemovePodsViolatingInterPodAntiAffinity`, `RemovePodsViolatingNodeAffinity` , `RemovePodsViolatingNodeTaints` are currently implemented.
As part of the policy, the parameters associated with the strategies can be configured too.
By default, all strategies are enabled.
@@ -248,7 +248,17 @@ strategies:
nodeAffinityType:
- "requiredDuringSchedulingIgnoredDuringExecution"
```
### RemovePodsViolatingNodeTaints
This strategy makes sure that pods violating NoSchedule taints on nodes are removed. For example: there is a pod "podA" with toleration to tolerate a taint ``key=value:NoSchedule`` scheduled and running on the tainted node. If the node's taint is subsequently updated/removed, taint is no longer satisfied by its pods' tolerations and will be evicted. The policy file should look like:
````
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"RemovePodsViolatingNodeTaints":
enabled: true
````
## Pod Evictions
When the descheduler decides to evict pods from a node, it employs following general mechanism:
@@ -271,7 +281,6 @@ disruption budget (PDB). The pods are evicted by using eviction subresource to h
This roadmap is not in any particular order.
* Strategy to consider taints and tolerations
* Consideration of pod affinity
* Strategy to consider pod life time
* Strategy to consider number of pending pods

View File

@@ -65,6 +65,6 @@ func Run(rs *options.DeschedulerServer) error {
strategies.LowNodeUtilization(rs, deschedulerPolicy.Strategies["LowNodeUtilization"], evictionPolicyGroupVersion, nodes, nodePodCount)
strategies.RemovePodsViolatingInterPodAntiAffinity(rs, deschedulerPolicy.Strategies["RemovePodsViolatingInterPodAntiAffinity"], evictionPolicyGroupVersion, nodes, nodePodCount)
strategies.RemovePodsViolatingNodeAffinity(rs, deschedulerPolicy.Strategies["RemovePodsViolatingNodeAffinity"], evictionPolicyGroupVersion, nodes, nodePodCount)
strategies.RemovePodsViolatingNodeTaints(rs, deschedulerPolicy.Strategies["RemovePodsViolatingNodeTaints"], evictionPolicyGroupVersion, nodes, nodePodCount)
return nil
}

View File

@@ -0,0 +1,138 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package strategies
import (
"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"k8s.io/api/core/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog"
)
const (
TolerationOpExists v1.TolerationOperator = "Exists"
TolerationOpEqual v1.TolerationOperator = "Equal"
)
// RemovePodsViolatingNodeTaints with elimination strategy
func RemovePodsViolatingNodeTaints(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, policyGroupVersion string, nodes []*v1.Node, nodePodCount nodePodEvictedCount) {
if !strategy.Enabled {
return
}
deletePodsViolatingNodeTaints(ds.Client, policyGroupVersion, nodes, ds.DryRun, nodePodCount, ds.MaxNoOfPodsToEvictPerNode, ds.EvictLocalStoragePods)
}
// deletePodsViolatingNodeTaints evicts pods on the node which violate NoSchedule Taints on nodes
func deletePodsViolatingNodeTaints(client clientset.Interface, policyGroupVersion string, nodes []*v1.Node, dryRun bool, nodePodCount nodePodEvictedCount, maxPodsToEvict int, evictLocalStoragePods bool) int {
podsEvicted := 0
for _, node := range nodes {
klog.V(1).Infof("Processing node: %#v\n", node.Name)
pods, err := podutil.ListEvictablePodsOnNode(client, node, evictLocalStoragePods)
if err != nil {
//no pods evicted as error encountered retrieving evictable Pods
return 0
}
totalPods := len(pods)
for i := 0; i < totalPods; i++ {
if maxPodsToEvict > 0 && nodePodCount[node]+1 > maxPodsToEvict {
break
}
if !checkPodsSatisfyTolerations(pods[i], node) {
success, err := evictions.EvictPod(client, pods[i], policyGroupVersion, dryRun)
if !success {
klog.Errorf("Error when evicting pod: %#v (%#v)\n", pods[i].Name, err)
} else {
nodePodCount[node]++
klog.V(1).Infof("Evicted pod: %#v (%#v)", pods[i].Name, err)
}
}
}
podsEvicted += nodePodCount[node]
}
return podsEvicted
}
// checkPodsSatisfyTolerations checks if the node's taints (NoSchedule) are still satisfied by pods' tolerations.
func checkPodsSatisfyTolerations(pod *v1.Pod, node *v1.Node) bool {
tolerations := pod.Spec.Tolerations
taints := node.Spec.Taints
if len(taints) == 0 {
return true
}
noScheduleTaints := getNoScheduleTaints(taints)
if !allTaintsTolerated(noScheduleTaints, tolerations) {
klog.V(2).Infof("Not all taints are tolerated after update for Pod %v on node %v", pod.Name, node.Name)
return false
}
return true
}
// getNoScheduleTaints return a slice of NoSchedule taints from the a slice of taints that it receives.
func getNoScheduleTaints(taints []v1.Taint) []v1.Taint {
result := []v1.Taint{}
for i := range taints {
if taints[i].Effect == v1.TaintEffectNoSchedule {
result = append(result, taints[i])
}
}
return result
}
//toleratesTaint returns true if a toleration tolerates a taint, or false otherwise
func toleratesTaint(toleration *v1.Toleration, taint *v1.Taint) bool {
if (len(toleration.Key) > 0 && toleration.Key != taint.Key) ||
(len(toleration.Effect) > 0 && toleration.Effect != taint.Effect) {
return false
}
switch toleration.Operator {
// empty operator means Equal
case "", TolerationOpEqual:
return toleration.Value == taint.Value
case TolerationOpExists:
return true
default:
return false
}
}
// allTaintsTolerated returns true if all are tolerated, or false otherwise.
func allTaintsTolerated(taints []v1.Taint, tolerations []v1.Toleration) bool {
if len(taints) == 0 {
return true
}
if len(tolerations) == 0 && len(taints) > 0 {
return false
}
for i := range taints {
tolerated := false
for j := range tolerations {
if toleratesTaint(&tolerations[j], &taints[i]) {
tolerated = true
break
}
}
if !tolerated {
return false
}
}
return true
}

View File

@@ -0,0 +1,295 @@
package strategies
import (
"fmt"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"sigs.k8s.io/descheduler/test"
"testing"
)
func createNoScheduleTaint(key, value string, index int) v1.Taint {
return v1.Taint{
Key: "testTaint" + fmt.Sprintf("%v", index),
Value: "test" + fmt.Sprintf("%v", index),
Effect: v1.TaintEffectNoSchedule,
}
}
func addTaintsToNode(node *v1.Node, key, value string, indices []int) *v1.Node {
taints := []v1.Taint{}
for _, index := range indices {
taints = append(taints, createNoScheduleTaint(key, value, index))
}
node.Spec.Taints = taints
return node
}
func addTolerationToPod(pod *v1.Pod, key, value string, index int) *v1.Pod {
if pod.Annotations == nil {
pod.Annotations = map[string]string{}
}
pod.Spec.Tolerations = []v1.Toleration{{Key: key + fmt.Sprintf("%v", index), Value: value + fmt.Sprintf("%v", index), Effect: v1.TaintEffectNoSchedule}}
return pod
}
func TestDeletePodsViolatingNodeTaints(t *testing.T) {
node1 := test.BuildTestNode("n1", 2000, 3000, 10)
node1 = addTaintsToNode(node1, "testTaint", "test", []int{1})
node2 := test.BuildTestNode("n2", 2000, 3000, 10)
node1 = addTaintsToNode(node2, "testingTaint", "testing", []int{1})
p1 := test.BuildTestPod("p1", 100, 0, node1.Name)
p2 := test.BuildTestPod("p2", 100, 0, node1.Name)
p3 := test.BuildTestPod("p3", 100, 0, node1.Name)
p4 := test.BuildTestPod("p4", 100, 0, node1.Name)
p5 := test.BuildTestPod("p5", 100, 0, node1.Name)
p6 := test.BuildTestPod("p6", 100, 0, node1.Name)
p1.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
p2.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
p3.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
p4.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
p5.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
p6.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
p7 := test.BuildTestPod("p7", 100, 0, node2.Name)
p8 := test.BuildTestPod("p8", 100, 0, node2.Name)
p9 := test.BuildTestPod("p9", 100, 0, node2.Name)
p10 := test.BuildTestPod("p10", 100, 0, node2.Name)
p11 := test.BuildTestPod("p11", 100, 0, node2.Name)
p11.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
// The following 4 pods won't get evicted.
// A Critical Pod.
p7.Namespace = "kube-system"
p7.Annotations = test.GetCriticalPodAnnotation()
// A daemonset.
p8.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
// A pod with local storage.
p9.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
p9.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
// A Mirror Pod.
p10.Annotations = test.GetMirrorPodAnnotation()
p1 = addTolerationToPod(p1, "testTaint", "test", 1)
p3 = addTolerationToPod(p3, "testTaint", "test", 1)
p4 = addTolerationToPod(p4, "testTaintX", "testX", 1)
tests := []struct {
description string
nodes []*v1.Node
pods []v1.Pod
evictLocalStoragePods bool
npe nodePodEvictedCount
maxPodsToEvict int
expectedEvictedPodCount int
}{
{
description: "Pods not tolerating node taint should be evicted",
pods: []v1.Pod{*p1, *p2, *p3},
nodes: []*v1.Node{node1},
evictLocalStoragePods: false,
npe: nodePodEvictedCount{node1: 0},
maxPodsToEvict: 0,
expectedEvictedPodCount: 1, //p2 gets evicted
},
{
description: "Pods with tolerations but not tolerating node taint should be evicted",
pods: []v1.Pod{*p1, *p3, *p4},
nodes: []*v1.Node{node1},
evictLocalStoragePods: false,
npe: nodePodEvictedCount{node1: 0},
maxPodsToEvict: 0,
expectedEvictedPodCount: 1, //p4 gets evicted
},
{
description: "Only <maxPodsToEvict> number of Pods not tolerating node taint should be evicted",
pods: []v1.Pod{*p1, *p5, *p6},
nodes: []*v1.Node{node1},
evictLocalStoragePods: false,
npe: nodePodEvictedCount{node1: 0},
maxPodsToEvict: 1,
expectedEvictedPodCount: 1, //p5 or p6 gets evicted
},
{
description: "Critical pods not tolerating node taint should not be evicted",
pods: []v1.Pod{*p7, *p8, *p9, *p10},
nodes: []*v1.Node{node2},
evictLocalStoragePods: false,
npe: nodePodEvictedCount{node2: 0},
maxPodsToEvict: 0,
expectedEvictedPodCount: 0,
},
{
description: "Critical pods except storage pods not tolerating node taint should not be evicted",
pods: []v1.Pod{*p7, *p8, *p9, *p10},
nodes: []*v1.Node{node2},
evictLocalStoragePods: true,
npe: nodePodEvictedCount{node2: 0},
maxPodsToEvict: 0,
expectedEvictedPodCount: 1,
},
{
description: "Critical and non critical pods, only non critical pods not tolerating node taint should be evicted",
pods: []v1.Pod{*p7, *p8, *p10, *p11},
nodes: []*v1.Node{node2},
evictLocalStoragePods: false,
npe: nodePodEvictedCount{node2: 0},
maxPodsToEvict: 0,
expectedEvictedPodCount: 1,
},
}
for _, tc := range tests {
// create fake client
fakeClient := &fake.Clientset{}
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
return true, &v1.PodList{Items: tc.pods}, nil
})
actualEvictedPodCount := deletePodsViolatingNodeTaints(fakeClient, "v1", tc.nodes, false, tc.npe, tc.maxPodsToEvict, tc.evictLocalStoragePods)
if actualEvictedPodCount != tc.expectedEvictedPodCount {
t.Errorf("Test %#v failed, Unexpected no of pods evicted: pods evicted: %d, expected: %d", tc.description, actualEvictedPodCount, tc.expectedEvictedPodCount)
}
}
}
func TestToleratesTaint(t *testing.T) {
testCases := []struct {
description string
toleration v1.Toleration
taint v1.Taint
expectTolerated bool
}{
{
description: "toleration and taint have the same key and effect, and operator is Exists, and taint has no value, expect tolerated",
toleration: v1.Toleration{
Key: "foo",
Operator: TolerationOpExists,
Effect: v1.TaintEffectNoSchedule,
},
taint: v1.Taint{
Key: "foo",
Effect: v1.TaintEffectNoSchedule,
},
expectTolerated: true,
},
{
description: "toleration and taint have the same key and effect, and operator is Exists, and taint has some value, expect tolerated",
toleration: v1.Toleration{
Key: "foo",
Operator: TolerationOpExists,
Effect: v1.TaintEffectNoSchedule,
},
taint: v1.Taint{
Key: "foo",
Value: "bar",
Effect: v1.TaintEffectNoSchedule,
},
expectTolerated: true,
},
{
description: "toleration and taint have the same effect, toleration has empty key and operator is Exists, means match all taints, expect tolerated",
toleration: v1.Toleration{
Key: "",
Operator: TolerationOpExists,
Effect: v1.TaintEffectNoSchedule,
},
taint: v1.Taint{
Key: "foo",
Value: "bar",
Effect: v1.TaintEffectNoSchedule,
},
expectTolerated: true,
},
{
description: "toleration and taint have the same key, effect and value, and operator is Equal, expect tolerated",
toleration: v1.Toleration{
Key: "foo",
Operator: TolerationOpEqual,
Value: "bar",
Effect: v1.TaintEffectNoSchedule,
},
taint: v1.Taint{
Key: "foo",
Value: "bar",
Effect: v1.TaintEffectNoSchedule,
},
expectTolerated: true,
},
{
description: "toleration and taint have the same key and effect, but different values, and operator is Equal, expect not tolerated",
toleration: v1.Toleration{
Key: "foo",
Operator: TolerationOpEqual,
Value: "value1",
Effect: v1.TaintEffectNoSchedule,
},
taint: v1.Taint{
Key: "foo",
Value: "value2",
Effect: v1.TaintEffectNoSchedule,
},
expectTolerated: false,
},
{
description: "toleration and taint have the same key and value, but different effects, and operator is Equal, expect not tolerated",
toleration: v1.Toleration{
Key: "foo",
Operator: TolerationOpEqual,
Value: "bar",
Effect: v1.TaintEffectNoSchedule,
},
taint: v1.Taint{
Key: "foo",
Value: "bar",
Effect: v1.TaintEffectNoExecute,
},
expectTolerated: false,
},
}
for _, tc := range testCases {
if tolerated := toleratesTaint(&tc.toleration, &tc.taint); tc.expectTolerated != tolerated {
t.Errorf("[%s] expect %v, got %v: toleration %+v, taint %s", tc.description, tc.expectTolerated, tolerated, tc.toleration, tc.taint.ToString())
}
}
}
func TestFilterNoExecuteTaints(t *testing.T) {
taints := []v1.Taint{
{
Key: "one",
Value: "one",
Effect: v1.TaintEffectNoExecute,
},
{
Key: "two",
Value: "two",
Effect: v1.TaintEffectNoSchedule,
},
}
taints = getNoScheduleTaints(taints)
if len(taints) != 1 || taints[0].Key != "two" {
t.Errorf("Filtering doesn't work. Got %v", taints)
}
}