1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-25 20:59:28 +01:00

Merge pull request #181 from jw-s/feature/respect-tolerations

Respect Node taints with tolerations (if exist)
This commit is contained in:
Kubernetes Prow Robot
2020-03-04 10:45:48 -08:00
committed by GitHub
4 changed files with 252 additions and 8 deletions

View File

@@ -19,10 +19,11 @@ package strategies
import (
"sort"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog"
"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
@@ -161,7 +162,9 @@ func evictPodsFromTargetNodes(client clientset.Interface, evictionPolicyGroupVer
// upper bound on total number of pods/cpu/memory to be moved
var totalPods, totalCPU, totalMem float64
var taintsOfLowNodes = make(map[string][]v1.Taint, len(lowNodes))
for _, node := range lowNodes {
taintsOfLowNodes[node.node.Name] = node.node.Spec.Taints
nodeCapacity := node.node.Status.Capacity
if len(node.node.Status.Allocatable) > 0 {
nodeCapacity = node.node.Status.Allocatable
@@ -202,18 +205,18 @@ func evictPodsFromTargetNodes(client clientset.Interface, evictionPolicyGroupVer
// sort the evictable Pods based on priority. This also sorts them based on QoS. If there are multiple pods with same priority, they are sorted based on QoS tiers.
sortPodsBasedOnPriority(evictablePods)
evictPods(evictablePods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict)
evictPods(evictablePods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict, taintsOfLowNodes)
} else {
// TODO: Remove this when we support only priority.
// Falling back to evicting pods based on priority.
klog.V(1).Infof("Evicting pods based on QoS")
klog.V(1).Infof("There are %v non-evictable pods on the node", len(node.nonRemovablePods))
// evict best effort pods
evictPods(node.bePods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict)
evictPods(node.bePods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict, taintsOfLowNodes)
// evict burstable pods
evictPods(node.bPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict)
evictPods(node.bPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict, taintsOfLowNodes)
// evict guaranteed pods
evictPods(node.gPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict)
evictPods(node.gPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict, taintsOfLowNodes)
}
nodepodCount[node.node] = currentPodsEvicted
podsEvicted = podsEvicted + nodepodCount[node.node]
@@ -232,15 +235,24 @@ func evictPods(inputPods []*v1.Pod,
totalCPU *float64,
totalMem *float64,
podsEvicted *int,
dryRun bool, maxPodsToEvict int) {
dryRun bool,
maxPodsToEvict int,
taintsOfLowNodes map[string][]v1.Taint) {
if IsNodeAboveTargetUtilization(nodeUsage, targetThresholds) && (*totalPods > 0 || *totalCPU > 0 || *totalMem > 0) {
onePodPercentage := api.Percentage((float64(1) * 100) / float64(nodeCapacity.Pods().Value()))
for _, pod := range inputPods {
if maxPodsToEvict > 0 && *podsEvicted+1 > maxPodsToEvict {
break
}
if !utils.PodToleratesTaints(pod, taintsOfLowNodes) {
klog.V(3).Infof("Skipping eviction for Pod: %#v, doesn't tolerate node taint", pod.Name)
continue
}
cUsage := utils.GetResourceRequest(pod, v1.ResourceCPU)
mUsage := utils.GetResourceRequest(pod, v1.ResourceMemory)
success, err := evictions.EvictPod(client, pod, evictionPolicyGroupVersion, dryRun)
if !success {
klog.Warningf("Error when evicting pod: %#v (%#v)", pod.Name, err)

View File

@@ -23,12 +23,18 @@ import (
"reflect"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
serializer "k8s.io/apimachinery/pkg/runtime/serializer"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/apis/componentconfig"
"sigs.k8s.io/descheduler/pkg/utils"
"sigs.k8s.io/descheduler/test"
)
@@ -327,3 +333,183 @@ func TestValidateThresholds(t *testing.T) {
}
}
}
func newFake(objects ...runtime.Object) *core.Fake {
scheme := runtime.NewScheme()
codecs := serializer.NewCodecFactory(scheme)
fake.AddToScheme(scheme)
o := core.NewObjectTracker(scheme, codecs.UniversalDecoder())
for _, obj := range objects {
if err := o.Add(obj); err != nil {
panic(err)
}
}
fakePtr := core.Fake{}
fakePtr.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
objs, err := o.List(
schema.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"},
schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Pod"},
action.GetNamespace(),
)
if err != nil {
return true, nil, err
}
obj := &v1.PodList{
Items: []v1.Pod{},
}
for _, pod := range objs.(*v1.PodList).Items {
podFieldSet := fields.Set(map[string]string{
"spec.nodeName": pod.Spec.NodeName,
"status.phase": string(pod.Status.Phase),
})
match := action.(core.ListAction).GetListRestrictions().Fields.Matches(podFieldSet)
if !match {
continue
}
obj.Items = append(obj.Items, *pod.DeepCopy())
}
return true, obj, nil
})
fakePtr.AddReactor("*", "*", core.ObjectReaction(o))
fakePtr.AddWatchReactor("*", core.DefaultWatchReactor(watch.NewFake(), nil))
return &fakePtr
}
func TestWithTaints(t *testing.T) {
strategy := api.DeschedulerStrategy{
Enabled: true,
Params: api.StrategyParameters{
NodeResourceUtilizationThresholds: api.NodeResourceUtilizationThresholds{
Thresholds: api.ResourceThresholds{
v1.ResourcePods: 20,
},
TargetThresholds: api.ResourceThresholds{
v1.ResourcePods: 70,
},
},
},
}
n1 := test.BuildTestNode("n1", 2000, 3000, 10)
n2 := test.BuildTestNode("n2", 1000, 3000, 10)
n3 := test.BuildTestNode("n3", 1000, 3000, 10)
n3withTaints := n3.DeepCopy()
n3withTaints.Spec.Taints = []v1.Taint{
{
Key: "key",
Value: "value",
Effect: v1.TaintEffectNoSchedule,
},
}
podThatToleratesTaint := test.BuildTestPod("tolerate_pod", 200, 0, n1.Name)
podThatToleratesTaint.Spec.Tolerations = []v1.Toleration{
{
Key: "key",
Value: "value",
},
}
tests := []struct {
name string
nodes []*v1.Node
pods []*v1.Pod
evictionsExpected int
}{
{
name: "No taints",
nodes: []*v1.Node{n1, n2, n3},
pods: []*v1.Pod{
//Node 1 pods
test.BuildTestPod(fmt.Sprintf("pod_1_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_2_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_3_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_4_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_5_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_6_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_7_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_8_%s", n1.Name), 200, 0, n1.Name),
// Node 2 pods
test.BuildTestPod(fmt.Sprintf("pod_9_%s", n2.Name), 200, 0, n2.Name),
},
evictionsExpected: 1,
},
{
name: "No pod tolerates node taint",
nodes: []*v1.Node{n1, n3withTaints},
pods: []*v1.Pod{
//Node 1 pods
test.BuildTestPod(fmt.Sprintf("pod_1_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_2_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_3_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_4_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_5_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_6_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_7_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_8_%s", n1.Name), 200, 0, n1.Name),
// Node 3 pods
test.BuildTestPod(fmt.Sprintf("pod_9_%s", n3withTaints.Name), 200, 0, n3withTaints.Name),
},
evictionsExpected: 0,
},
{
name: "Pod which tolerates node taint",
nodes: []*v1.Node{n1, n3withTaints},
pods: []*v1.Pod{
//Node 1 pods
test.BuildTestPod(fmt.Sprintf("pod_1_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_2_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_3_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_4_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_5_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_6_%s", n1.Name), 200, 0, n1.Name),
test.BuildTestPod(fmt.Sprintf("pod_7_%s", n1.Name), 200, 0, n1.Name),
podThatToleratesTaint,
// Node 3 pods
test.BuildTestPod(fmt.Sprintf("pod_9_%s", n3withTaints.Name), 200, 0, n3withTaints.Name),
},
evictionsExpected: 1,
},
}
for _, item := range tests {
t.Run(item.name, func(t *testing.T) {
var objs []runtime.Object
for _, node := range item.nodes {
objs = append(objs, node)
}
for _, pod := range item.pods {
pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
objs = append(objs, pod)
}
fakePtr := newFake(objs...)
var evictionCounter int
fakePtr.PrependReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) {
if action.GetSubresource() != "eviction" || action.GetResource().Resource != "pods" {
return false, nil, nil
}
evictionCounter++
return true, nil, nil
})
ds := &options.DeschedulerServer{
Client: &fake.Clientset{Fake: *fakePtr},
DeschedulerConfiguration: componentconfig.DeschedulerConfiguration{
EvictLocalStoragePods: false,
},
}
nodePodCount := utils.InitializeNodePodCount(item.nodes)
LowNodeUtilization(ds, strategy, "policy/v1", item.nodes, nodePodCount)
if item.evictionsExpected != evictionCounter {
t.Errorf("Expected %v evictions, got %v", item.evictionsExpected, evictionCounter)
}
})
}
}

View File

@@ -2,10 +2,12 @@ package utils
import (
"fmt"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
utilfeature "k8s.io/apiserver/pkg/util/feature"
"k8s.io/component-base/featuregate"
"k8s.io/klog"
)
const (
@@ -179,3 +181,15 @@ func maxResourceList(list, new v1.ResourceList) {
}
}
}
// PodToleratesTaints returns true if a pod tolerates one node's taints
func PodToleratesTaints(pod *v1.Pod, taintsOfNodes map[string][]v1.Taint) bool {
for nodeName, taintsForNode := range taintsOfNodes {
if len(pod.Spec.Tolerations) >= len(taintsForNode) && TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, taintsForNode, nil) {
return true
}
klog.V(5).Infof("pod: %#v doesn't tolerate node %s's taints", pod.Name, nodeName)
}
return false
}

View File

@@ -19,7 +19,7 @@ package utils
import (
"fmt"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/selection"
"k8s.io/klog"
@@ -126,3 +126,35 @@ func NodeSelectorRequirementsAsSelector(nsm []v1.NodeSelectorRequirement) (label
}
return selector, nil
}
// TolerationsTolerateTaint checks if taint is tolerated by any of the tolerations.
func TolerationsTolerateTaint(tolerations []v1.Toleration, taint *v1.Taint) bool {
for i := range tolerations {
if tolerations[i].ToleratesTaint(taint) {
return true
}
}
return false
}
type taintsFilterFunc func(*v1.Taint) bool
// TolerationsTolerateTaintsWithFilter checks if given tolerations tolerates
// all the taints that apply to the filter in given taint list.
func TolerationsTolerateTaintsWithFilter(tolerations []v1.Toleration, taints []v1.Taint, applyFilter taintsFilterFunc) bool {
if len(taints) == 0 {
return true
}
for i := range taints {
if applyFilter != nil && !applyFilter(&taints[i]) {
continue
}
if !TolerationsTolerateTaint(tolerations, &taints[i]) {
return false
}
}
return true
}