From 40bb490f4cd2f5b1ee218e03c25c6df223ae8313 Mon Sep 17 00:00:00 2001 From: Shubham Minglani Date: Fri, 12 Jan 2018 14:59:56 +0530 Subject: [PATCH] add RemovePodsViolatingNodeAffinity strategy This commit adds requiredDuringSchedulingIgnoredDuringExecution for RemovePodsViolatingNodeAffinity strategy. Also adds unit tests and documentation. --- README.md | 17 +- examples/node-affinity.yml | 8 + pkg/api/types.go | 1 + pkg/api/v1alpha1/types.go | 1 + pkg/descheduler/descheduler.go | 1 + pkg/descheduler/node/node.go | 41 +++ pkg/descheduler/node/node_test.go | 243 ++++++++++++++++++ pkg/descheduler/strategies/node_affinity.go | 70 +++++ .../strategies/node_affinity_test.go | 163 ++++++++++++ pkg/utils/predicates.go | 94 +++++++ 10 files changed, 638 insertions(+), 1 deletion(-) create mode 100644 examples/node-affinity.yml create mode 100644 pkg/descheduler/strategies/node_affinity.go create mode 100644 pkg/descheduler/strategies/node_affinity_test.go create mode 100644 pkg/utils/predicates.go diff --git a/README.md b/README.md index a73e68875..61d7a3d0e 100644 --- a/README.md +++ b/README.md @@ -162,7 +162,7 @@ $ kubectl create -f descheduler-job.yaml ## Policy and Strategies Descheduler's policy is configurable and includes strategies to be enabled or disabled. -Three strategies, `RemoveDuplicates`, `LowNodeUtilization`, `RemovePodsViolatingInterPodAntiAffinity` are currently implemented. +Four strategies, `RemoveDuplicates`, `LowNodeUtilization`, `RemovePodsViolatingInterPodAntiAffinity`, `RemovePodsViolatingNodeAffinity` are currently implemented. As part of the policy, the parameters associated with the strategies can be configured too. By default, all strategies are enabled. @@ -238,6 +238,21 @@ strategies: enabled: false ``` +### RemovePodsViolatingNodeAffinity + +This strategy makes sure that pods violating node affinity are removed from nodes. For example, there is podA that was scheduled on nodeA which satisfied the node affinity rule `requiredDuringSchedulingIgnoredDuringExecution` at the time of scheduling, but over time nodeA no longer satisfies the rule, then if another node nodeB is available that satisfies the node affinity rule, then podA will be evicted from nodeA. The policy file should like this - + +``` +apiVersion: "descheduler/v1alpha1" +kind: "DeschedulerPolicy" +strategies: + "RemovePodsViolatingNodeAffinity": + enabled: true + params: + nodeAffinityType: + - "requiredDuringSchedulingIgnoredDuringExecution" +``` + ## Pod Evictions When the descheduler decides to evict pods from a node, it employs following general mechanism: diff --git a/examples/node-affinity.yml b/examples/node-affinity.yml new file mode 100644 index 000000000..c8421006d --- /dev/null +++ b/examples/node-affinity.yml @@ -0,0 +1,8 @@ +apiVersion: "descheduler/v1alpha1" +kind: "DeschedulerPolicy" +strategies: + "RemovePodsViolatingNodeAffinity": + enabled: true + params: + nodeAffinityType: + - "requiredDuringSchedulingIgnoredDuringExecution" diff --git a/pkg/api/types.go b/pkg/api/types.go index 1f99f42a0..5b1d1c30c 100644 --- a/pkg/api/types.go +++ b/pkg/api/types.go @@ -47,6 +47,7 @@ type DeschedulerStrategy struct { // Only one of its members may be specified type StrategyParameters struct { NodeResourceUtilizationThresholds NodeResourceUtilizationThresholds + NodeAffinityType []string } type Percentage float64 diff --git a/pkg/api/v1alpha1/types.go b/pkg/api/v1alpha1/types.go index 97034544f..1ca0cac9b 100644 --- a/pkg/api/v1alpha1/types.go +++ b/pkg/api/v1alpha1/types.go @@ -47,6 +47,7 @@ type DeschedulerStrategy struct { // Only one of its members may be specified type StrategyParameters struct { NodeResourceUtilizationThresholds NodeResourceUtilizationThresholds `json:"nodeResourceUtilizationThresholds,omitempty"` + NodeAffinityType []string `json:"nodeAffinityType,omitempty"` } type Percentage float64 diff --git a/pkg/descheduler/descheduler.go b/pkg/descheduler/descheduler.go index f90516d84..2e45ef419 100644 --- a/pkg/descheduler/descheduler.go +++ b/pkg/descheduler/descheduler.go @@ -63,6 +63,7 @@ func Run(rs *options.DeschedulerServer) error { strategies.RemoveDuplicatePods(rs, deschedulerPolicy.Strategies["RemoveDuplicates"], evictionPolicyGroupVersion, nodes) strategies.LowNodeUtilization(rs, deschedulerPolicy.Strategies["LowNodeUtilization"], evictionPolicyGroupVersion, nodes) strategies.RemovePodsViolatingInterPodAntiAffinity(rs, deschedulerPolicy.Strategies["RemovePodsViolatingInterPodAntiAffinity"], evictionPolicyGroupVersion, nodes) + strategies.RemovePodsViolatingNodeAffinity(rs, deschedulerPolicy.Strategies["RemovePodsViolatingNodeAffinity"], evictionPolicyGroupVersion, nodes) return nil } diff --git a/pkg/descheduler/node/node.go b/pkg/descheduler/node/node.go index ccbb78877..101714864 100644 --- a/pkg/descheduler/node/node.go +++ b/pkg/descheduler/node/node.go @@ -20,6 +20,7 @@ import ( "time" "github.com/golang/glog" + "github.com/kubernetes-incubator/descheduler/pkg/utils" "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/fields" @@ -124,3 +125,43 @@ func IsNodeUschedulable(node *v1.Node) bool { } return false } + +// PodFitsAnyNode checks if the given pod fits any of the given nodes, based on +// multiple criteria, like, pod node selector matching the node label, node +// being schedulable or not. +func PodFitsAnyNode(pod *v1.Pod, nodes []*v1.Node) bool { + for _, node := range nodes { + + ok, err := utils.PodMatchNodeSelector(pod, node) + if err != nil || !ok { + continue + } + if ok { + if !IsNodeUschedulable(node) { + glog.V(2).Infof("Pod %v can possibly be scheduled on %v", pod.Name, node.Name) + return true + } + return false + } + } + return false +} + +// PodFitsCurrentNode checks if the given pod fits on the given node if the pod +// node selector matches the node label. +func PodFitsCurrentNode(pod *v1.Pod, node *v1.Node) bool { + ok, err := utils.PodMatchNodeSelector(pod, node) + + if err != nil { + glog.Error(err) + return false + } + + if !ok { + glog.V(1).Infof("Pod %v does not fit on node %v", pod.Name, node.Name) + return false + } + + glog.V(3).Infof("Pod %v fits on node %v", pod.Name, node.Name) + return true +} diff --git a/pkg/descheduler/node/node_test.go b/pkg/descheduler/node/node_test.go index 543ef18e8..44e922ace 100644 --- a/pkg/descheduler/node/node_test.go +++ b/pkg/descheduler/node/node_test.go @@ -21,6 +21,7 @@ import ( "github.com/kubernetes-incubator/descheduler/test" "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/client-go/kubernetes/fake" ) @@ -102,3 +103,245 @@ func TestIsNodeUschedulable(t *testing.T) { } } + +func TestPodFitsCurrentNode(t *testing.T) { + + nodeLabelKey := "kubernetes.io/desiredNode" + nodeLabelValue := "yes" + + tests := []struct { + description string + pod *v1.Pod + node *v1.Node + success bool + }{ + { + description: "Pod with nodeAffinity set, expected to fit the node", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: nodeLabelKey, + Operator: "In", + Values: []string{ + nodeLabelValue, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + node: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + nodeLabelKey: nodeLabelValue, + }, + }, + }, + success: true, + }, + { + description: "Pod with nodeAffinity set, not expected to fit the node", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: nodeLabelKey, + Operator: "In", + Values: []string{ + nodeLabelValue, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + node: &v1.Node{ + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + nodeLabelKey: "no", + }, + }, + }, + success: false, + }, + } + + for _, tc := range tests { + actual := PodFitsCurrentNode(tc.pod, tc.node) + if actual != tc.success { + t.Errorf("Test %#v failed", tc.description) + } + } +} + +func TestPodFitsAnyNode(t *testing.T) { + + nodeLabelKey := "kubernetes.io/desiredNode" + nodeLabelValue := "yes" + + tests := []struct { + description string + pod *v1.Pod + nodes []*v1.Node + success bool + }{ + { + description: "Pod expected to fit one of the nodes", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: nodeLabelKey, + Operator: "In", + Values: []string{ + nodeLabelValue, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + nodes: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + nodeLabelKey: nodeLabelValue, + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + nodeLabelKey: "no", + }, + }, + }, + }, + success: true, + }, + { + description: "Pod expected to fit none of the nodes", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: nodeLabelKey, + Operator: "In", + Values: []string{ + nodeLabelValue, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + nodes: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + nodeLabelKey: "unfit1", + }, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + nodeLabelKey: "unfit2", + }, + }, + }, + }, + success: false, + }, + { + description: "Nodes are unschedulable but labels match, should fail", + pod: &v1.Pod{ + Spec: v1.PodSpec{ + Affinity: &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: nodeLabelKey, + Operator: "In", + Values: []string{ + nodeLabelValue, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + nodes: []*v1.Node{ + { + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + nodeLabelKey: nodeLabelValue, + }, + }, + Spec: v1.NodeSpec{ + Unschedulable: true, + }, + }, + { + ObjectMeta: metav1.ObjectMeta{ + Labels: map[string]string{ + nodeLabelKey: "no", + }, + }, + }, + }, + success: false, + }, + } + + for _, tc := range tests { + actual := PodFitsAnyNode(tc.pod, tc.nodes) + if actual != tc.success { + t.Errorf("Test %#v failed", tc.description) + } + } +} diff --git a/pkg/descheduler/strategies/node_affinity.go b/pkg/descheduler/strategies/node_affinity.go new file mode 100644 index 000000000..a61437dec --- /dev/null +++ b/pkg/descheduler/strategies/node_affinity.go @@ -0,0 +1,70 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package strategies + +import ( + "github.com/golang/glog" + "github.com/kubernetes-incubator/descheduler/cmd/descheduler/app/options" + "github.com/kubernetes-incubator/descheduler/pkg/api" + "github.com/kubernetes-incubator/descheduler/pkg/descheduler/evictions" + nodeutil "github.com/kubernetes-incubator/descheduler/pkg/descheduler/node" + podutil "github.com/kubernetes-incubator/descheduler/pkg/descheduler/pod" + "k8s.io/api/core/v1" +) + +func RemovePodsViolatingNodeAffinity(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, evictionPolicyGroupVersion string, nodes []*v1.Node) { + evictionCount := removePodsViolatingNodeAffinityCount(ds, strategy, evictionPolicyGroupVersion, nodes) + glog.V(1).Infof("Evicted %v pods", evictionCount) +} + +func removePodsViolatingNodeAffinityCount(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, evictionPolicyGroupVersion string, nodes []*v1.Node) int { + evictedPodCount := 0 + if !strategy.Enabled { + return evictedPodCount + } + + for _, nodeAffinity := range strategy.Params.NodeAffinityType { + glog.V(2).Infof("Executing for nodeAffinityType: %v", nodeAffinity) + + switch nodeAffinity { + case "requiredDuringSchedulingIgnoredDuringExecution": + for _, node := range nodes { + glog.V(1).Infof("Processing node: %#v\n", node.Name) + + pods, err := podutil.ListEvictablePodsOnNode(ds.Client, node) + if err != nil { + glog.Errorf("failed to get pods from %v: %v", node.Name, err) + } + + for _, pod := range pods { + if pod.Spec.Affinity != nil && pod.Spec.Affinity.NodeAffinity != nil && pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil { + + if !nodeutil.PodFitsCurrentNode(pod, node) && nodeutil.PodFitsAnyNode(pod, nodes) { + glog.V(1).Infof("Evicting pod: %v", pod.Name) + evictions.EvictPod(ds.Client, pod, evictionPolicyGroupVersion, false) + evictedPodCount++ + } + } + } + } + default: + glog.Errorf("invalid nodeAffinityType: %v", nodeAffinity) + return evictedPodCount + } + } + return evictedPodCount +} diff --git a/pkg/descheduler/strategies/node_affinity_test.go b/pkg/descheduler/strategies/node_affinity_test.go new file mode 100644 index 000000000..05db8c556 --- /dev/null +++ b/pkg/descheduler/strategies/node_affinity_test.go @@ -0,0 +1,163 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package strategies + +import ( + "testing" + + "github.com/kubernetes-incubator/descheduler/cmd/descheduler/app/options" + "github.com/kubernetes-incubator/descheduler/pkg/api" + "github.com/kubernetes-incubator/descheduler/test" + "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/runtime" + "k8s.io/client-go/kubernetes/fake" + core "k8s.io/client-go/testing" +) + +func TestRemovePodsViolatingNodeAffinity(t *testing.T) { + + requiredDuringSchedulingIgnoredDuringExecutionStrategy := api.DeschedulerStrategy{ + Enabled: true, + Params: api.StrategyParameters{ + NodeAffinityType: []string{ + "requiredDuringSchedulingIgnoredDuringExecution", + }, + }, + } + + nodeLabelKey := "kubernetes.io/desiredNode" + nodeLabelValue := "yes" + nodeWithLabels := test.BuildTestNode("nodeWithLabels", 2000, 3000, 10) + nodeWithLabels.Labels[nodeLabelKey] = nodeLabelValue + + nodeWithoutLabels := test.BuildTestNode("nodeWithoutLabels", 2000, 3000, 10) + + unschedulableNodeWithLabels := test.BuildTestNode("unschedulableNodeWithLabels", 2000, 3000, 10) + nodeWithLabels.Labels[nodeLabelKey] = nodeLabelValue + unschedulableNodeWithLabels.Spec.Unschedulable = true + + addPodsToNode := func(node *v1.Node) []v1.Pod { + podWithNodeAffinity := test.BuildTestPod("podWithNodeAffinity", 100, 0, node.Name) + podWithNodeAffinity.Spec.Affinity = &v1.Affinity{ + NodeAffinity: &v1.NodeAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{ + NodeSelectorTerms: []v1.NodeSelectorTerm{ + { + MatchExpressions: []v1.NodeSelectorRequirement{ + { + Key: nodeLabelKey, + Operator: "In", + Values: []string{ + nodeLabelValue, + }, + }, + }, + }, + }, + }, + }, + } + + pod1 := test.BuildTestPod("pod1", 100, 0, node.Name) + pod2 := test.BuildTestPod("pod2", 100, 0, node.Name) + + podWithNodeAffinity.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod1.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + pod2.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() + + return []v1.Pod{ + *podWithNodeAffinity, + *pod1, + *pod2, + } + } + + tests := []struct { + description string + nodes []*v1.Node + pods []v1.Pod + strategy api.DeschedulerStrategy + expectedEvictedPodCount int + }{ + { + description: "Strategy disabled, should not evict any pods", + strategy: api.DeschedulerStrategy{ + Enabled: false, + Params: api.StrategyParameters{ + NodeAffinityType: []string{ + "requiredDuringSchedulingIgnoredDuringExecution", + }, + }, + }, + expectedEvictedPodCount: 0, + pods: addPodsToNode(nodeWithoutLabels), + nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels}, + }, + { + description: "Invalid strategy type, should not evict any pods", + strategy: api.DeschedulerStrategy{ + Enabled: true, + Params: api.StrategyParameters{ + NodeAffinityType: []string{ + "requiredDuringSchedulingRequiredDuringExecution", + }, + }, + }, + expectedEvictedPodCount: 0, + pods: addPodsToNode(nodeWithoutLabels), + nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels}, + }, + { + description: "Pod is correctly scheduled on node, no eviction expected", + strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy, + expectedEvictedPodCount: 0, + pods: addPodsToNode(nodeWithLabels), + nodes: []*v1.Node{nodeWithLabels}, + }, + { + description: "Pod is scheduled on node without matching labels, another schedulable node available, should be evicted", + expectedEvictedPodCount: 1, + strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy, + pods: addPodsToNode(nodeWithoutLabels), + nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels}, + }, + { + description: "Pod is scheduled on node without matching labels, but no node where pod fits is available, should not evict", + expectedEvictedPodCount: 0, + strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy, + pods: addPodsToNode(nodeWithoutLabels), + nodes: []*v1.Node{nodeWithoutLabels, unschedulableNodeWithLabels}, + }, + } + + for _, tc := range tests { + + fakeClient := &fake.Clientset{} + fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) { + return true, &v1.PodList{Items: tc.pods}, nil + }) + + ds := options.DeschedulerServer{ + Client: fakeClient, + } + + actualEvictedPodCount := removePodsViolatingNodeAffinityCount(&ds, tc.strategy, "v1", tc.nodes) + if actualEvictedPodCount != tc.expectedEvictedPodCount { + t.Errorf("Test %#v failed, expected %v pod evictions, but got %v pod evictions\n", tc.description, tc.expectedEvictedPodCount, actualEvictedPodCount) + } + } +} diff --git a/pkg/utils/predicates.go b/pkg/utils/predicates.go new file mode 100644 index 000000000..def8d1f7a --- /dev/null +++ b/pkg/utils/predicates.go @@ -0,0 +1,94 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package utils + +import ( + "fmt" + + "github.com/golang/glog" + "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/labels" + v1helper "k8s.io/kubernetes/pkg/apis/core/v1/helper" +) + +// The following code has been copied from predicates package to avoid the +// huge vendoring issues, mostly copied from +// k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates/ +// Some minor changes have been made to ease the imports, but most of the code +// remains untouched + +// PodMatchNodeSelector checks if a pod node selector matches the node label. +func PodMatchNodeSelector(pod *v1.Pod, node *v1.Node) (bool, error) { + if node == nil { + return false, fmt.Errorf("node not found") + } + if podMatchesNodeLabels(pod, node) { + return true, nil + } + return false, nil +} + +// The pod can only schedule onto nodes that satisfy requirements in both NodeAffinity and nodeSelector. +func podMatchesNodeLabels(pod *v1.Pod, node *v1.Node) bool { + // Check if node.Labels match pod.Spec.NodeSelector. + if len(pod.Spec.NodeSelector) > 0 { + selector := labels.SelectorFromSet(pod.Spec.NodeSelector) + if !selector.Matches(labels.Set(node.Labels)) { + return false + } + } + + // 1. nil NodeSelector matches all nodes (i.e. does not filter out any nodes) + // 2. nil []NodeSelectorTerm (equivalent to non-nil empty NodeSelector) matches no nodes + // 3. zero-length non-nil []NodeSelectorTerm matches no nodes also, just for simplicity + // 4. nil []NodeSelectorRequirement (equivalent to non-nil empty NodeSelectorTerm) matches no nodes + // 5. zero-length non-nil []NodeSelectorRequirement matches no nodes also, just for simplicity + // 6. non-nil empty NodeSelectorRequirement is not allowed + + affinity := pod.Spec.Affinity + if affinity != nil && affinity.NodeAffinity != nil { + nodeAffinity := affinity.NodeAffinity + // if no required NodeAffinity requirements, will do no-op, means select all nodes. + if nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution == nil { + return true + } + + // Match node selector for requiredDuringSchedulingIgnoredDuringExecution. + if nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil { + nodeSelectorTerms := nodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution.NodeSelectorTerms + glog.V(10).Infof("Match for RequiredDuringSchedulingIgnoredDuringExecution node selector terms %+v", nodeSelectorTerms) + return nodeMatchesNodeSelectorTerms(node, nodeSelectorTerms) + } + } + return true +} + +// nodeMatchesNodeSelectorTerms checks if a node's labels satisfy a list of node selector terms, +// terms are ORed, and an empty list of terms will match nothing. +func nodeMatchesNodeSelectorTerms(node *v1.Node, nodeSelectorTerms []v1.NodeSelectorTerm) bool { + for _, req := range nodeSelectorTerms { + nodeSelector, err := v1helper.NodeSelectorRequirementsAsSelector(req.MatchExpressions) + if err != nil { + glog.V(10).Infof("Failed to parse MatchExpressions: %+v, regarding as not match.", req.MatchExpressions) + return false + } + if nodeSelector.Matches(labels.Set(node.Labels)) { + return true + } + } + return false +}