mirror of
https://github.com/kubernetes-sigs/descheduler.git
synced 2026-01-25 20:59:28 +01:00
Merge pull request #181 from jw-s/feature/respect-tolerations
Respect Node taints with tolerations (if exist)
This commit is contained in:
@@ -19,10 +19,11 @@ package strategies
|
||||
import (
|
||||
"sort"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/klog"
|
||||
|
||||
"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
|
||||
"sigs.k8s.io/descheduler/pkg/api"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
|
||||
@@ -161,7 +162,9 @@ func evictPodsFromTargetNodes(client clientset.Interface, evictionPolicyGroupVer
|
||||
|
||||
// upper bound on total number of pods/cpu/memory to be moved
|
||||
var totalPods, totalCPU, totalMem float64
|
||||
var taintsOfLowNodes = make(map[string][]v1.Taint, len(lowNodes))
|
||||
for _, node := range lowNodes {
|
||||
taintsOfLowNodes[node.node.Name] = node.node.Spec.Taints
|
||||
nodeCapacity := node.node.Status.Capacity
|
||||
if len(node.node.Status.Allocatable) > 0 {
|
||||
nodeCapacity = node.node.Status.Allocatable
|
||||
@@ -202,18 +205,18 @@ func evictPodsFromTargetNodes(client clientset.Interface, evictionPolicyGroupVer
|
||||
|
||||
// sort the evictable Pods based on priority. This also sorts them based on QoS. If there are multiple pods with same priority, they are sorted based on QoS tiers.
|
||||
sortPodsBasedOnPriority(evictablePods)
|
||||
evictPods(evictablePods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, ¤tPodsEvicted, dryRun, maxPodsToEvict)
|
||||
evictPods(evictablePods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, ¤tPodsEvicted, dryRun, maxPodsToEvict, taintsOfLowNodes)
|
||||
} else {
|
||||
// TODO: Remove this when we support only priority.
|
||||
// Falling back to evicting pods based on priority.
|
||||
klog.V(1).Infof("Evicting pods based on QoS")
|
||||
klog.V(1).Infof("There are %v non-evictable pods on the node", len(node.nonRemovablePods))
|
||||
// evict best effort pods
|
||||
evictPods(node.bePods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, ¤tPodsEvicted, dryRun, maxPodsToEvict)
|
||||
evictPods(node.bePods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, ¤tPodsEvicted, dryRun, maxPodsToEvict, taintsOfLowNodes)
|
||||
// evict burstable pods
|
||||
evictPods(node.bPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, ¤tPodsEvicted, dryRun, maxPodsToEvict)
|
||||
evictPods(node.bPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, ¤tPodsEvicted, dryRun, maxPodsToEvict, taintsOfLowNodes)
|
||||
// evict guaranteed pods
|
||||
evictPods(node.gPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, ¤tPodsEvicted, dryRun, maxPodsToEvict)
|
||||
evictPods(node.gPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, ¤tPodsEvicted, dryRun, maxPodsToEvict, taintsOfLowNodes)
|
||||
}
|
||||
nodepodCount[node.node] = currentPodsEvicted
|
||||
podsEvicted = podsEvicted + nodepodCount[node.node]
|
||||
@@ -232,15 +235,24 @@ func evictPods(inputPods []*v1.Pod,
|
||||
totalCPU *float64,
|
||||
totalMem *float64,
|
||||
podsEvicted *int,
|
||||
dryRun bool, maxPodsToEvict int) {
|
||||
dryRun bool,
|
||||
maxPodsToEvict int,
|
||||
taintsOfLowNodes map[string][]v1.Taint) {
|
||||
if IsNodeAboveTargetUtilization(nodeUsage, targetThresholds) && (*totalPods > 0 || *totalCPU > 0 || *totalMem > 0) {
|
||||
onePodPercentage := api.Percentage((float64(1) * 100) / float64(nodeCapacity.Pods().Value()))
|
||||
for _, pod := range inputPods {
|
||||
if maxPodsToEvict > 0 && *podsEvicted+1 > maxPodsToEvict {
|
||||
break
|
||||
}
|
||||
|
||||
if !utils.PodToleratesTaints(pod, taintsOfLowNodes) {
|
||||
klog.V(3).Infof("Skipping eviction for Pod: %#v, doesn't tolerate node taint", pod.Name)
|
||||
continue
|
||||
}
|
||||
|
||||
cUsage := utils.GetResourceRequest(pod, v1.ResourceCPU)
|
||||
mUsage := utils.GetResourceRequest(pod, v1.ResourceMemory)
|
||||
|
||||
success, err := evictions.EvictPod(client, pod, evictionPolicyGroupVersion, dryRun)
|
||||
if !success {
|
||||
klog.Warningf("Error when evicting pod: %#v (%#v)", pod.Name, err)
|
||||
|
||||
@@ -23,12 +23,18 @@ import (
|
||||
|
||||
"reflect"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/fields"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
serializer "k8s.io/apimachinery/pkg/runtime/serializer"
|
||||
"k8s.io/apimachinery/pkg/watch"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
core "k8s.io/client-go/testing"
|
||||
"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
|
||||
"sigs.k8s.io/descheduler/pkg/api"
|
||||
"sigs.k8s.io/descheduler/pkg/apis/componentconfig"
|
||||
"sigs.k8s.io/descheduler/pkg/utils"
|
||||
"sigs.k8s.io/descheduler/test"
|
||||
)
|
||||
@@ -327,3 +333,183 @@ func TestValidateThresholds(t *testing.T) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func newFake(objects ...runtime.Object) *core.Fake {
|
||||
scheme := runtime.NewScheme()
|
||||
codecs := serializer.NewCodecFactory(scheme)
|
||||
fake.AddToScheme(scheme)
|
||||
o := core.NewObjectTracker(scheme, codecs.UniversalDecoder())
|
||||
for _, obj := range objects {
|
||||
if err := o.Add(obj); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
fakePtr := core.Fake{}
|
||||
fakePtr.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
|
||||
objs, err := o.List(
|
||||
schema.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"},
|
||||
schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Pod"},
|
||||
action.GetNamespace(),
|
||||
)
|
||||
if err != nil {
|
||||
return true, nil, err
|
||||
}
|
||||
|
||||
obj := &v1.PodList{
|
||||
Items: []v1.Pod{},
|
||||
}
|
||||
for _, pod := range objs.(*v1.PodList).Items {
|
||||
podFieldSet := fields.Set(map[string]string{
|
||||
"spec.nodeName": pod.Spec.NodeName,
|
||||
"status.phase": string(pod.Status.Phase),
|
||||
})
|
||||
match := action.(core.ListAction).GetListRestrictions().Fields.Matches(podFieldSet)
|
||||
if !match {
|
||||
continue
|
||||
}
|
||||
obj.Items = append(obj.Items, *pod.DeepCopy())
|
||||
}
|
||||
return true, obj, nil
|
||||
})
|
||||
fakePtr.AddReactor("*", "*", core.ObjectReaction(o))
|
||||
fakePtr.AddWatchReactor("*", core.DefaultWatchReactor(watch.NewFake(), nil))
|
||||
|
||||
return &fakePtr
|
||||
}
|
||||
|
||||
func TestWithTaints(t *testing.T) {
|
||||
strategy := api.DeschedulerStrategy{
|
||||
Enabled: true,
|
||||
Params: api.StrategyParameters{
|
||||
NodeResourceUtilizationThresholds: api.NodeResourceUtilizationThresholds{
|
||||
Thresholds: api.ResourceThresholds{
|
||||
v1.ResourcePods: 20,
|
||||
},
|
||||
TargetThresholds: api.ResourceThresholds{
|
||||
v1.ResourcePods: 70,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
n1 := test.BuildTestNode("n1", 2000, 3000, 10)
|
||||
n2 := test.BuildTestNode("n2", 1000, 3000, 10)
|
||||
n3 := test.BuildTestNode("n3", 1000, 3000, 10)
|
||||
n3withTaints := n3.DeepCopy()
|
||||
n3withTaints.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: "key",
|
||||
Value: "value",
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
|
||||
podThatToleratesTaint := test.BuildTestPod("tolerate_pod", 200, 0, n1.Name)
|
||||
podThatToleratesTaint.Spec.Tolerations = []v1.Toleration{
|
||||
{
|
||||
Key: "key",
|
||||
Value: "value",
|
||||
},
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
nodes []*v1.Node
|
||||
pods []*v1.Pod
|
||||
evictionsExpected int
|
||||
}{
|
||||
{
|
||||
name: "No taints",
|
||||
nodes: []*v1.Node{n1, n2, n3},
|
||||
pods: []*v1.Pod{
|
||||
//Node 1 pods
|
||||
test.BuildTestPod(fmt.Sprintf("pod_1_%s", n1.Name), 200, 0, n1.Name),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_2_%s", n1.Name), 200, 0, n1.Name),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_3_%s", n1.Name), 200, 0, n1.Name),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_4_%s", n1.Name), 200, 0, n1.Name),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_5_%s", n1.Name), 200, 0, n1.Name),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_6_%s", n1.Name), 200, 0, n1.Name),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_7_%s", n1.Name), 200, 0, n1.Name),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_8_%s", n1.Name), 200, 0, n1.Name),
|
||||
// Node 2 pods
|
||||
test.BuildTestPod(fmt.Sprintf("pod_9_%s", n2.Name), 200, 0, n2.Name),
|
||||
},
|
||||
evictionsExpected: 1,
|
||||
},
|
||||
{
|
||||
name: "No pod tolerates node taint",
|
||||
nodes: []*v1.Node{n1, n3withTaints},
|
||||
pods: []*v1.Pod{
|
||||
//Node 1 pods
|
||||
test.BuildTestPod(fmt.Sprintf("pod_1_%s", n1.Name), 200, 0, n1.Name),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_2_%s", n1.Name), 200, 0, n1.Name),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_3_%s", n1.Name), 200, 0, n1.Name),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_4_%s", n1.Name), 200, 0, n1.Name),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_5_%s", n1.Name), 200, 0, n1.Name),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_6_%s", n1.Name), 200, 0, n1.Name),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_7_%s", n1.Name), 200, 0, n1.Name),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_8_%s", n1.Name), 200, 0, n1.Name),
|
||||
// Node 3 pods
|
||||
test.BuildTestPod(fmt.Sprintf("pod_9_%s", n3withTaints.Name), 200, 0, n3withTaints.Name),
|
||||
},
|
||||
evictionsExpected: 0,
|
||||
},
|
||||
{
|
||||
name: "Pod which tolerates node taint",
|
||||
nodes: []*v1.Node{n1, n3withTaints},
|
||||
pods: []*v1.Pod{
|
||||
//Node 1 pods
|
||||
test.BuildTestPod(fmt.Sprintf("pod_1_%s", n1.Name), 200, 0, n1.Name),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_2_%s", n1.Name), 200, 0, n1.Name),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_3_%s", n1.Name), 200, 0, n1.Name),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_4_%s", n1.Name), 200, 0, n1.Name),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_5_%s", n1.Name), 200, 0, n1.Name),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_6_%s", n1.Name), 200, 0, n1.Name),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_7_%s", n1.Name), 200, 0, n1.Name),
|
||||
podThatToleratesTaint,
|
||||
// Node 3 pods
|
||||
test.BuildTestPod(fmt.Sprintf("pod_9_%s", n3withTaints.Name), 200, 0, n3withTaints.Name),
|
||||
},
|
||||
evictionsExpected: 1,
|
||||
},
|
||||
}
|
||||
|
||||
for _, item := range tests {
|
||||
t.Run(item.name, func(t *testing.T) {
|
||||
var objs []runtime.Object
|
||||
for _, node := range item.nodes {
|
||||
objs = append(objs, node)
|
||||
}
|
||||
|
||||
for _, pod := range item.pods {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
|
||||
objs = append(objs, pod)
|
||||
}
|
||||
|
||||
fakePtr := newFake(objs...)
|
||||
var evictionCounter int
|
||||
fakePtr.PrependReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) {
|
||||
if action.GetSubresource() != "eviction" || action.GetResource().Resource != "pods" {
|
||||
return false, nil, nil
|
||||
}
|
||||
evictionCounter++
|
||||
return true, nil, nil
|
||||
})
|
||||
|
||||
ds := &options.DeschedulerServer{
|
||||
Client: &fake.Clientset{Fake: *fakePtr},
|
||||
DeschedulerConfiguration: componentconfig.DeschedulerConfiguration{
|
||||
EvictLocalStoragePods: false,
|
||||
},
|
||||
}
|
||||
|
||||
nodePodCount := utils.InitializeNodePodCount(item.nodes)
|
||||
LowNodeUtilization(ds, strategy, "policy/v1", item.nodes, nodePodCount)
|
||||
|
||||
if item.evictionsExpected != evictionCounter {
|
||||
t.Errorf("Expected %v evictions, got %v", item.evictionsExpected, evictionCounter)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2,10 +2,12 @@ package utils
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
utilfeature "k8s.io/apiserver/pkg/util/feature"
|
||||
"k8s.io/component-base/featuregate"
|
||||
"k8s.io/klog"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -179,3 +181,15 @@ func maxResourceList(list, new v1.ResourceList) {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// PodToleratesTaints returns true if a pod tolerates one node's taints
|
||||
func PodToleratesTaints(pod *v1.Pod, taintsOfNodes map[string][]v1.Taint) bool {
|
||||
for nodeName, taintsForNode := range taintsOfNodes {
|
||||
if len(pod.Spec.Tolerations) >= len(taintsForNode) && TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, taintsForNode, nil) {
|
||||
return true
|
||||
}
|
||||
klog.V(5).Infof("pod: %#v doesn't tolerate node %s's taints", pod.Name, nodeName)
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -19,7 +19,7 @@ package utils
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"k8s.io/api/core/v1"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/selection"
|
||||
"k8s.io/klog"
|
||||
@@ -126,3 +126,35 @@ func NodeSelectorRequirementsAsSelector(nsm []v1.NodeSelectorRequirement) (label
|
||||
}
|
||||
return selector, nil
|
||||
}
|
||||
|
||||
// TolerationsTolerateTaint checks if taint is tolerated by any of the tolerations.
|
||||
func TolerationsTolerateTaint(tolerations []v1.Toleration, taint *v1.Taint) bool {
|
||||
for i := range tolerations {
|
||||
if tolerations[i].ToleratesTaint(taint) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
type taintsFilterFunc func(*v1.Taint) bool
|
||||
|
||||
// TolerationsTolerateTaintsWithFilter checks if given tolerations tolerates
|
||||
// all the taints that apply to the filter in given taint list.
|
||||
func TolerationsTolerateTaintsWithFilter(tolerations []v1.Toleration, taints []v1.Taint, applyFilter taintsFilterFunc) bool {
|
||||
if len(taints) == 0 {
|
||||
return true
|
||||
}
|
||||
|
||||
for i := range taints {
|
||||
if applyFilter != nil && !applyFilter(&taints[i]) {
|
||||
continue
|
||||
}
|
||||
|
||||
if !TolerationsTolerateTaint(tolerations, &taints[i]) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user