diff --git a/examples/policy.yaml b/examples/policy.yaml index c20ac0729..38fd0d8f3 100644 --- a/examples/policy.yaml +++ b/examples/policy.yaml @@ -3,6 +3,8 @@ kind: "DeschedulerPolicy" strategies: "RemoveDuplicates": enabled: true + "RemovePodsViolatingInterPodAntiAffinity": + enabled: true "LowNodeUtilization": enabled: true params: diff --git a/pkg/descheduler/descheduler.go b/pkg/descheduler/descheduler.go index 8aa756b0d..7e7b33456 100644 --- a/pkg/descheduler/descheduler.go +++ b/pkg/descheduler/descheduler.go @@ -55,6 +55,7 @@ func Run(rs *options.DeschedulerServer) error { strategies.RemoveDuplicatePods(rs, deschedulerPolicy.Strategies["RemoveDuplicates"], evictionPolicyGroupVersion, nodes) strategies.LowNodeUtilization(rs, deschedulerPolicy.Strategies["LowNodeUtilization"], evictionPolicyGroupVersion, nodes) + strategies.RemovePodsViolatingInterPodAntiAffinity(rs, deschedulerPolicy.Strategies["RemovePodsViolatingInterPodAntiAffinity"], evictionPolicyGroupVersion, nodes) return nil } diff --git a/pkg/descheduler/strategies/pod_antiaffinity.go b/pkg/descheduler/strategies/pod_antiaffinity.go new file mode 100644 index 000000000..2beb300f0 --- /dev/null +++ b/pkg/descheduler/strategies/pod_antiaffinity.go @@ -0,0 +1,102 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package strategies + +import ( + "github.com/kubernetes-incubator/descheduler/cmd/descheduler/app/options" + "github.com/kubernetes-incubator/descheduler/pkg/api" + "k8s.io/kubernetes/pkg/api/v1" + + "github.com/golang/glog" + "github.com/kubernetes-incubator/descheduler/pkg/descheduler/evictions" + podutil "github.com/kubernetes-incubator/descheduler/pkg/descheduler/pod" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + //TODO: Change to client-go instead of generated clientset. + "k8s.io/kubernetes/pkg/client/clientset_generated/clientset" + priorityutil "k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/priorities/util" + "k8s.io/kubernetes/plugin/pkg/scheduler/schedulercache" +) + +// RemovePodsViolatingInterPodAntiAffinity with elimination strategy +func RemovePodsViolatingInterPodAntiAffinity(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, policyGroupVersion string, nodes []*v1.Node) { + if !strategy.Enabled { + return + } + removePodsWithAffinityRules(ds.Client, policyGroupVersion, nodes, ds.DryRun) +} + +// removePodsWithAffinityRules evicts pods on the node which are having a pod affinity rules. +func removePodsWithAffinityRules(client clientset.Interface, policyGroupVersion string, nodes []*v1.Node, dryRun bool) int { + podsEvicted := 0 + for _, node := range nodes { + glog.V(1).Infof("Processing node: %#v\n", node.Name) + pods, err := podutil.ListPodsOnANode(client, node) + if err != nil { + return 0 + } + totalPods := len(pods) + for i := 0; i < totalPods; i++ { + if checkPodsWithAntiAffinityExist(pods[i], pods) { + success, err := evictions.EvictPod(client, pods[i], policyGroupVersion, dryRun) + if !success { + glog.Infof("Error when evicting pod: %#v (%#v)\n", pods[i].Name, err) + } else { + podsEvicted++ + glog.V(1).Infof("Evicted pod: %#v (%#v)\n because of existing anti-affinity", pods[i].Name, err) + // Since the current pod is evicted all other pods which have anti-affinity with this + // pod need not be evicted. + // Update pods. + pods = append(pods[:i], pods[i+1:]...) + i-- + totalPods-- + } + } + } + } + return podsEvicted +} + +// checkPodsWithAntiAffinityExist checks if there are other pods on the node that the current pod cannot tolerate. +func checkPodsWithAntiAffinityExist(pod *v1.Pod, pods []*v1.Pod) bool { + affinity := schedulercache.ReconcileAffinity(pod) + if affinity != nil && affinity.PodAntiAffinity != nil { + for _, term := range getPodAntiAffinityTerms(affinity.PodAntiAffinity) { + namespaces := priorityutil.GetNamespacesFromPodAffinityTerm(pod, &term) + selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector) + if err != nil { + glog.Infof("%v", err) + return false + } + for _, existingPod := range pods { + if existingPod.Name != pod.Name && priorityutil.PodMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector) { + return true + } + } + } + } + return false +} + +// getPodAntiAffinityTerms gets the antiaffinity terms for the given pod. +func getPodAntiAffinityTerms(podAntiAffinity *v1.PodAntiAffinity) (terms []v1.PodAffinityTerm) { + if podAntiAffinity != nil { + if len(podAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0 { + terms = podAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution + } + } + return terms +} diff --git a/pkg/descheduler/strategies/pod_antiaffinity_test.go b/pkg/descheduler/strategies/pod_antiaffinity_test.go new file mode 100644 index 000000000..62286e7e5 --- /dev/null +++ b/pkg/descheduler/strategies/pod_antiaffinity_test.go @@ -0,0 +1,86 @@ +/* +Copyright 2017 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package strategies + +import ( + "testing" + + "fmt" + "github.com/kubernetes-incubator/descheduler/test" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + core "k8s.io/client-go/testing" + "k8s.io/kubernetes/pkg/api/v1" + "k8s.io/kubernetes/pkg/client/clientset_generated/clientset/fake" +) + +func TestPodAntiAffinity(t *testing.T) { + node := test.BuildTestNode("n1", 2000, 3000, 10) + p1 := test.BuildTestPod("p1", 100, 0, node.Name) + p2 := test.BuildTestPod("p2", 100, 0, node.Name) + p3 := test.BuildTestPod("p3", 100, 0, node.Name) + p3.Labels = map[string]string{"foo": "bar"} + p1.Spec.Affinity = &v1.Affinity{ + PodAntiAffinity: &v1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"bar"}, + }, + }, + }, + TopologyKey: "region", + }, + }, + }, + } + p3.Spec.Affinity = &v1.Affinity{ + PodAntiAffinity: &v1.PodAntiAffinity{ + RequiredDuringSchedulingIgnoredDuringExecution: []v1.PodAffinityTerm{ + { + LabelSelector: &metav1.LabelSelector{ + MatchExpressions: []metav1.LabelSelectorRequirement{ + { + Key: "foo", + Operator: metav1.LabelSelectorOpIn, + Values: []string{"bar"}, + }, + }, + }, + TopologyKey: "region", + }, + }, + }, + } + fakeClient := &fake.Clientset{} + fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) { + return true, &v1.PodList{Items: []v1.Pod{*p1, *p2, *p3}}, nil + }) + fakeClient.Fake.AddReactor("get", "nodes", func(action core.Action) (bool, runtime.Object, error) { + return true, node, nil + }) + expectedEvictedPodCount := 1 + podsEvicted := removePodsWithAffinityRules(fakeClient, "v1", []*v1.Node{node}, false) + if podsEvicted != expectedEvictedPodCount { + fmt.Println(podsEvicted) + t.Errorf("Unexpected no of pods evicted") + } +}