1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-25 20:59:28 +01:00

add RemovePodsViolatingNodeAffinity strategy

This commit adds requiredDuringSchedulingIgnoredDuringExecution
for RemovePodsViolatingNodeAffinity strategy.

Also adds unit tests and documentation.
This commit is contained in:
Shubham Minglani
2018-01-12 14:59:56 +05:30
parent 08729f6ef9
commit 40bb490f4c
10 changed files with 638 additions and 1 deletions

View File

@@ -162,7 +162,7 @@ $ kubectl create -f descheduler-job.yaml
## Policy and Strategies
Descheduler's policy is configurable and includes strategies to be enabled or disabled.
Three strategies, `RemoveDuplicates`, `LowNodeUtilization`, `RemovePodsViolatingInterPodAntiAffinity` are currently implemented.
Four strategies, `RemoveDuplicates`, `LowNodeUtilization`, `RemovePodsViolatingInterPodAntiAffinity`, `RemovePodsViolatingNodeAffinity` are currently implemented.
As part of the policy, the parameters associated with the strategies can be configured too.
By default, all strategies are enabled.
@@ -238,6 +238,21 @@ strategies:
enabled: false
```
### RemovePodsViolatingNodeAffinity
This strategy makes sure that pods violating node affinity are removed from nodes. For example, there is podA that was scheduled on nodeA which satisfied the node affinity rule `requiredDuringSchedulingIgnoredDuringExecution` at the time of scheduling, but over time nodeA no longer satisfies the rule, then if another node nodeB is available that satisfies the node affinity rule, then podA will be evicted from nodeA. The policy file should like this -
```
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"RemovePodsViolatingNodeAffinity":
enabled: true
params:
nodeAffinityType:
- "requiredDuringSchedulingIgnoredDuringExecution"
```
## Pod Evictions
When the descheduler decides to evict pods from a node, it employs following general mechanism:

View File

@@ -0,0 +1,8 @@
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"RemovePodsViolatingNodeAffinity":
enabled: true
params:
nodeAffinityType:
- "requiredDuringSchedulingIgnoredDuringExecution"

View File

@@ -47,6 +47,7 @@ type DeschedulerStrategy struct {
// Only one of its members may be specified
type StrategyParameters struct {
NodeResourceUtilizationThresholds NodeResourceUtilizationThresholds
NodeAffinityType []string
}
type Percentage float64

View File

@@ -47,6 +47,7 @@ type DeschedulerStrategy struct {
// Only one of its members may be specified
type StrategyParameters struct {
NodeResourceUtilizationThresholds NodeResourceUtilizationThresholds `json:"nodeResourceUtilizationThresholds,omitempty"`
NodeAffinityType []string `json:"nodeAffinityType,omitempty"`
}
type Percentage float64

View File

@@ -63,6 +63,7 @@ func Run(rs *options.DeschedulerServer) error {
strategies.RemoveDuplicatePods(rs, deschedulerPolicy.Strategies["RemoveDuplicates"], evictionPolicyGroupVersion, nodes)
strategies.LowNodeUtilization(rs, deschedulerPolicy.Strategies["LowNodeUtilization"], evictionPolicyGroupVersion, nodes)
strategies.RemovePodsViolatingInterPodAntiAffinity(rs, deschedulerPolicy.Strategies["RemovePodsViolatingInterPodAntiAffinity"], evictionPolicyGroupVersion, nodes)
strategies.RemovePodsViolatingNodeAffinity(rs, deschedulerPolicy.Strategies["RemovePodsViolatingNodeAffinity"], evictionPolicyGroupVersion, nodes)
return nil
}

View File

@@ -20,6 +20,7 @@ import (
"time"
"github.com/golang/glog"
"github.com/kubernetes-incubator/descheduler/pkg/utils"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
@@ -124,3 +125,43 @@ func IsNodeUschedulable(node *v1.Node) bool {
}
return false
}
// PodFitsAnyNode checks if the given pod fits any of the given nodes, based on
// multiple criteria, like, pod node selector matching the node label, node
// being schedulable or not.
func PodFitsAnyNode(pod *v1.Pod, nodes []*v1.Node) bool {
for _, node := range nodes {
ok, err := utils.PodMatchNodeSelector(pod, node)
if err != nil || !ok {
continue
}
if ok {
if !IsNodeUschedulable(node) {
glog.V(2).Infof("Pod %v can possibly be scheduled on %v", pod.Name, node.Name)
return true
}
return false
}
}
return false
}
// PodFitsCurrentNode checks if the given pod fits on the given node if the pod
// node selector matches the node label.
func PodFitsCurrentNode(pod *v1.Pod, node *v1.Node) bool {
ok, err := utils.PodMatchNodeSelector(pod, node)
if err != nil {
glog.Error(err)
return false
}
if !ok {
glog.V(1).Infof("Pod %v does not fit on node %v", pod.Name, node.Name)
return false
}
glog.V(3).Infof("Pod %v fits on node %v", pod.Name, node.Name)
return true
}

View File

@@ -21,6 +21,7 @@ import (
"github.com/kubernetes-incubator/descheduler/test"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/kubernetes/fake"
)
@@ -102,3 +103,245 @@ func TestIsNodeUschedulable(t *testing.T) {
}
}
func TestPodFitsCurrentNode(t *testing.T) {
nodeLabelKey := "kubernetes.io/desiredNode"
nodeLabelValue := "yes"
tests := []struct {
description string
pod *v1.Pod
node *v1.Node
success bool
}{
{
description: "Pod with nodeAffinity set, expected to fit the node",
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: nodeLabelKey,
Operator: "In",
Values: []string{
nodeLabelValue,
},
},
},
},
},
},
},
},
},
},
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
nodeLabelKey: nodeLabelValue,
},
},
},
success: true,
},
{
description: "Pod with nodeAffinity set, not expected to fit the node",
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: nodeLabelKey,
Operator: "In",
Values: []string{
nodeLabelValue,
},
},
},
},
},
},
},
},
},
},
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
nodeLabelKey: "no",
},
},
},
success: false,
},
}
for _, tc := range tests {
actual := PodFitsCurrentNode(tc.pod, tc.node)
if actual != tc.success {
t.Errorf("Test %#v failed", tc.description)
}
}
}
func TestPodFitsAnyNode(t *testing.T) {
nodeLabelKey := "kubernetes.io/desiredNode"
nodeLabelValue := "yes"
tests := []struct {
description string
pod *v1.Pod
nodes []*v1.Node
success bool
}{
{
description: "Pod expected to fit one of the nodes",
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: nodeLabelKey,
Operator: "In",
Values: []string{
nodeLabelValue,
},
},
},
},
},
},
},
},
},
},
nodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
nodeLabelKey: nodeLabelValue,
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
nodeLabelKey: "no",
},
},
},
},
success: true,
},
{
description: "Pod expected to fit none of the nodes",
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: nodeLabelKey,
Operator: "In",
Values: []string{
nodeLabelValue,
},
},
},
},
},
},
},
},
},
},
nodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
nodeLabelKey: "unfit1",
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
nodeLabelKey: "unfit2",
},
},
},
},
success: false,
},
{
description: "Nodes are unschedulable but labels match, should fail",
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: nodeLabelKey,
Operator: "In",
Values: []string{
nodeLabelValue,
},
},
},
},
},
},
},
},
},
},
nodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
nodeLabelKey: nodeLabelValue,
},
},
Spec: v1.NodeSpec{
Unschedulable: true,
},
},
{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
nodeLabelKey: "no",
},
},
},
},
success: false,
},
}
for _, tc := range tests {
actual := PodFitsAnyNode(tc.pod, tc.nodes)
if actual != tc.success {
t.Errorf("Test %#v failed", tc.description)
}
}
}

View File

@@ -0,0 +1,70 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package strategies
import (
"github.com/golang/glog"
"github.com/kubernetes-incubator/descheduler/cmd/descheduler/app/options"
"github.com/kubernetes-incubator/descheduler/pkg/api"
"github.com/kubernetes-incubator/descheduler/pkg/descheduler/evictions"
nodeutil "github.com/kubernetes-incubator/descheduler/pkg/descheduler/node"
podutil "github.com/kubernetes-incubator/descheduler/pkg/descheduler/pod"
"k8s.io/api/core/v1"
)
func RemovePodsViolatingNodeAffinity(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, evictionPolicyGroupVersion string, nodes []*v1.Node) {
evictionCount := removePodsViolatingNodeAffinityCount(ds, strategy, evictionPolicyGroupVersion, nodes)
glog.V(1).Infof("Evicted %v pods", evictionCount)
}
func removePodsViolatingNodeAffinityCount(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, evictionPolicyGroupVersion string, nodes []*v1.Node) int {
evictedPodCount := 0
if !strategy.Enabled {
return evictedPodCount
}
for _, nodeAffinity := range strategy.Params.NodeAffinityType {
glog.V(2).Infof("Executing for nodeAffinityType: %v", nodeAffinity)
switch nodeAffinity {
case "requiredDuringSchedulingIgnoredDuringExecution":
for _, node := range nodes {
glog.V(1).Infof("Processing node: %#v\n", node.Name)
pods, err := podutil.ListEvictablePodsOnNode(ds.Client, node)
if err != nil {
glog.Errorf("failed to get pods from %v: %v", node.Name, err)
}
for _, pod := range pods {
if pod.Spec.Affinity != nil && pod.Spec.Affinity.NodeAffinity != nil && pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil {
if !nodeutil.PodFitsCurrentNode(pod, node) && nodeutil.PodFitsAnyNode(pod, nodes) {
glog.V(1).Infof("Evicting pod: %v", pod.Name)
evictions.EvictPod(ds.Client, pod, evictionPolicyGroupVersion, false)
evictedPodCount++
}
}
}
}
default:
glog.Errorf("invalid nodeAffinityType: %v", nodeAffinity)
return evictedPodCount
}
}
return evictedPodCount
}

View File

@@ -0,0 +1,163 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package strategies
import (
"testing"
"github.com/kubernetes-incubator/descheduler/cmd/descheduler/app/options"
"github.com/kubernetes-incubator/descheduler/pkg/api"
"github.com/kubernetes-incubator/descheduler/test"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
)
func TestRemovePodsViolatingNodeAffinity(t *testing.T) {
requiredDuringSchedulingIgnoredDuringExecutionStrategy := api.DeschedulerStrategy{
Enabled: true,
Params: api.StrategyParameters{
NodeAffinityType: []string{
"requiredDuringSchedulingIgnoredDuringExecution",
},
},
}
nodeLabelKey := "kubernetes.io/desiredNode"
nodeLabelValue := "yes"
nodeWithLabels := test.BuildTestNode("nodeWithLabels", 2000, 3000, 10)
nodeWithLabels.Labels[nodeLabelKey] = nodeLabelValue
nodeWithoutLabels := test.BuildTestNode("nodeWithoutLabels", 2000, 3000, 10)
unschedulableNodeWithLabels := test.BuildTestNode("unschedulableNodeWithLabels", 2000, 3000, 10)
nodeWithLabels.Labels[nodeLabelKey] = nodeLabelValue
unschedulableNodeWithLabels.Spec.Unschedulable = true
addPodsToNode := func(node *v1.Node) []v1.Pod {
podWithNodeAffinity := test.BuildTestPod("podWithNodeAffinity", 100, 0, node.Name)
podWithNodeAffinity.Spec.Affinity = &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: nodeLabelKey,
Operator: "In",
Values: []string{
nodeLabelValue,
},
},
},
},
},
},
},
}
pod1 := test.BuildTestPod("pod1", 100, 0, node.Name)
pod2 := test.BuildTestPod("pod2", 100, 0, node.Name)
podWithNodeAffinity.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod1.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod2.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
return []v1.Pod{
*podWithNodeAffinity,
*pod1,
*pod2,
}
}
tests := []struct {
description string
nodes []*v1.Node
pods []v1.Pod
strategy api.DeschedulerStrategy
expectedEvictedPodCount int
}{
{
description: "Strategy disabled, should not evict any pods",
strategy: api.DeschedulerStrategy{
Enabled: false,
Params: api.StrategyParameters{
NodeAffinityType: []string{
"requiredDuringSchedulingIgnoredDuringExecution",
},
},
},
expectedEvictedPodCount: 0,
pods: addPodsToNode(nodeWithoutLabels),
nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels},
},
{
description: "Invalid strategy type, should not evict any pods",
strategy: api.DeschedulerStrategy{
Enabled: true,
Params: api.StrategyParameters{
NodeAffinityType: []string{
"requiredDuringSchedulingRequiredDuringExecution",
},
},
},
expectedEvictedPodCount: 0,
pods: addPodsToNode(nodeWithoutLabels),
nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels},
},
{
description: "Pod is correctly scheduled on node, no eviction expected",
strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy,
expectedEvictedPodCount: 0,
pods: addPodsToNode(nodeWithLabels),
nodes: []*v1.Node{nodeWithLabels},
},
{
description: "Pod is scheduled on node without matching labels, another schedulable node available, should be evicted",
expectedEvictedPodCount: 1,
strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy,
pods: addPodsToNode(nodeWithoutLabels),
nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels},
},
{
description: "Pod is scheduled on node without matching labels, but no node where pod fits is available, should not evict",
expectedEvictedPodCount: 0,
strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy,
pods: addPodsToNode(nodeWithoutLabels),
nodes: []*v1.Node{nodeWithoutLabels, unschedulableNodeWithLabels},
},
}
for _, tc := range tests {
fakeClient := &fake.Clientset{}
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
return true, &v1.PodList{Items: tc.pods}, nil
})
ds := options.DeschedulerServer{
Client: fakeClient,
}
actualEvictedPodCount := removePodsViolatingNodeAffinityCount(&ds, tc.strategy, "v1", tc.nodes)
if actualEvictedPodCount != tc.expectedEvictedPodCount {
t.Errorf("Test %#v failed, expected %v pod evictions, but got %v pod evictions\n", tc.description, tc.expectedEvictedPodCount, actualEvictedPodCount)
}
}
}

94
pkg/utils/predicates.go Normal file
View File

@@ -0,0 +1,94 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package utils
import (
"fmt"
"github.com/golang/glog"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/labels"
v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper"
)
// The following code has been copied from predicates package to avoid the
// huge vendoring issues, mostly copied from
// k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates/
// Some minor changes have been made to ease the imports, but most of the code
// remains untouched
// PodMatchNodeSelector checks if a pod node selector matches the node label.
func PodMatchNodeSelector(pod *v1.Pod, node *v1.Node) (bool, error) {
if node == nil {
return false, fmt.Errorf("node not found")
}
if podMatchesNodeLabels(pod, node) {
return true, nil
}
return false, nil
}
// The pod can only schedule onto nodes that satisfy requirements in both NodeAffinity and nodeSelector.
func podMatchesNodeLabels(pod *v1.Pod, node *v1.Node) bool {
// Check if node.Labels match pod.Spec.NodeSelector.
if len(pod.Spec.NodeSelector) > 0 {
selector := labels.SelectorFromSet(pod.Spec.NodeSelector)
if !selector.Matches(labels.Set(node.Labels)) {
return false
}
}
// 1. nil NodeSelector matches all nodes (i.e. does not filter out any nodes)
// 2. nil []NodeSelectorTerm (equivalent to non-nil empty NodeSelector) matches no nodes
// 3. zero-length non-nil []NodeSelectorTerm matches no nodes also, just for simplicity
// 4. nil []NodeSelectorRequirement (equivalent to non-nil empty NodeSelectorTerm) matches no nodes
// 5. zero-length non-nil []NodeSelectorRequirement matches no nodes also, just for simplicity
// 6. non-nil empty NodeSelectorRequirement is not allowed
affinity := pod.Spec.Affinity
if affinity != nil && affinity.NodeAffinity != nil {
nodeAffinity := affinity.NodeAffinity
// if no required NodeAffinity requirements, will do no-op, means select all nodes.
if nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil {
return true
}
// Match node selector for requiredDuringSchedulingIgnoredDuringExecution.
if nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil {
nodeSelectorTerms := nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms
glog.V(10).Infof("Match for RequiredDuringSchedulingIgnoredDuringExecution node selector terms %+v", nodeSelectorTerms)
return nodeMatchesNodeSelectorTerms(node, nodeSelectorTerms)
}
}
return true
}
// nodeMatchesNodeSelectorTerms checks if a node's labels satisfy a list of node selector terms,
// terms are ORed, and an empty list of terms will match nothing.
func nodeMatchesNodeSelectorTerms(node *v1.Node, nodeSelectorTerms []v1.NodeSelectorTerm) bool {
for _, req := range nodeSelectorTerms {
nodeSelector, err := v1helper.NodeSelectorRequirementsAsSelector(req.MatchExpressions)
if err != nil {
glog.V(10).Infof("Failed to parse MatchExpressions: %+v, regarding as not match.", req.MatchExpressions)
return false
}
if nodeSelector.Matches(labels.Set(node.Labels)) {
return true
}
}
return false
}