1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 13:29:11 +01:00

Compare commits

..

17 Commits

Author SHA1 Message Date
RaviSantosh Gudimetla
468e138070 Merge pull request #114 from nikhita/contributing.md
Add CONTRIBUTING.md
2018-09-03 09:15:37 -04:00
Nikhita Raghunath
db13b2ac73 Add CONTRIBUTING.md 2018-09-01 19:29:24 +05:30
Avesh Agarwal
40ca53e0a5 Merge pull request #113 from sanity-io/master
Yaml files for kubernetes
2018-08-24 08:50:41 -04:00
nicholas.klem
35d8367fe5 rbac.authorization.k8s.io/v1 - not v1beta1 2018-08-24 14:44:19 +02:00
nicholas.klem
345dd9cf27 add kubernetes yaml files 2018-08-24 14:34:38 +02:00
RaviSantosh Gudimetla
81f471fe05 Merge pull request #111 from kubernetes-incubator/ravisantoshgudimetla-patch-2
Remove production usage warning
2018-08-23 15:57:09 -04:00
RaviSantosh Gudimetla
aa5e8770f5 Remove production usage warning
Removing production usage warning to encourage more users to try using descheduler and since descheduler has been stable so far.
2018-08-23 15:44:17 -04:00
RaviSantosh Gudimetla
2690d139c5 Merge pull request #110 from kubernetes-incubator/ravisantoshgudimetla-patch-1
Update the compatibility matrix
2018-08-22 16:31:09 -04:00
RaviSantosh Gudimetla
cd192ce5fc Update the compatibility matrix
Descheduler 0.4+ should work with kube 1.9+.
2018-08-22 16:09:18 -04:00
RaviSantosh Gudimetla
048f3fd1e5 Merge pull request #109 from ravisantoshgudimetla/test-cases-cleanup
Remove the unnecessary print statements in test file
2018-08-22 12:20:27 -04:00
ravisantoshgudimetla
a079fd2757 Remove the unnecessary print statements 2018-08-22 11:27:40 -04:00
RaviSantosh Gudimetla
ae0a9ed525 Merge pull request #108 from ravisantoshgudimetla/fix-warnings-ci
Fix deprecated warning in CI
2018-08-21 15:10:40 -04:00
ravisantoshgudimetla
0a815e8786 Fix deprecated warning in CI 2018-08-21 14:45:14 -04:00
RaviSantosh Gudimetla
0115748fe8 Merge pull request #105 from ravisantoshgudimetla/priority-low-node
Low node utilization to respect priority while evicting pods
2018-08-21 14:33:50 -04:00
ravisantoshgudimetla
d0305dac3f Low node utilization to respect priority while evicting pods 2018-08-21 14:14:26 -04:00
RaviSantosh Gudimetla
72d6a8aa33 Merge pull request #106 from ravisantoshgudimetla/fix-e2e
Fix broken e2e tests
2018-08-06 10:53:13 -04:00
ravisantoshgudimetla
654fdbba94 Fix broken e2e tests 2018-08-05 13:16:47 -04:00
9 changed files with 333 additions and 17 deletions

23
CONTRIBUTING.md Normal file
View File

@@ -0,0 +1,23 @@
# Contributing Guidelines
Welcome to Kubernetes. We are excited about the prospect of you joining our [community](https://github.com/kubernetes/community)! The Kubernetes community abides by the CNCF [code of conduct](code-of-conduct.md). Here is an excerpt:
_As contributors and maintainers of this project, and in the interest of fostering an open and welcoming community, we pledge to respect all people who contribute through reporting issues, posting feature requests, updating documentation, submitting pull requests or patches, and other activities._
## Getting Started
We have full documentation on how to get started contributing here:
- [Contributor License Agreement](https://git.k8s.io/community/CLA.md) Kubernetes projects require that you sign a Contributor License Agreement (CLA) before we can accept your pull requests
- [Kubernetes Contributor Guide](http://git.k8s.io/community/contributors/guide) - Main contributor documentation, or you can just jump directly to the [contributing section](http://git.k8s.io/community/contributors/guide#contributing)
- [Contributor Cheat Sheet](https://git.k8s.io/community/contributors/guide/contributor-cheatsheet.md) - Common resources for existing developers
## Mentorship
- [Mentoring Initiatives](https://git.k8s.io/community/mentoring) - We have a diverse set of mentorship programs available that are always looking for volunteers!
## Contact Information
- [Slack channel](https://kubernetes.slack.com/messages/sig-scheduling)
- [Mailing list](https://groups.google.com/forum/#!forum/kubernetes-sig-scheduling)

View File

@@ -285,11 +285,18 @@ This roadmap is not in any particular order.
Descheduler | supported Kubernetes version
-------------|-----------------------------
0.4 | 1.9+
0.4+ | 1.9+
0.1-0.3 | 1.7-1.8
## Note
## Community, discussion, contribution, and support
This project is under active development, and is not intended for production use.
Any api could be changed any time with out any notice. That said, your feedback is
very important and appreciated to make this project more stable and useful.
Learn how to engage with the Kubernetes community on the [community page](http://kubernetes.io/community/).
You can reach the maintainers of this project at:
- [Slack channel](https://kubernetes.slack.com/messages/sig-scheduling)
- [Mailing list](https://groups.google.com/forum/#!forum/kubernetes-sig-scheduling)
### Code of conduct
Participation in the Kubernetes community is governed by the [Kubernetes Code of Conduct](code-of-conduct.md).

View File

@@ -10,7 +10,7 @@ master_uuid=$(uuid)
node1_uuid=$(uuid)
node2_uuid=$(uuid)
kube_apiserver_port=6443
kube_version=1.9.4
kube_version=1.11.1
DESCHEDULER_ROOT=$(dirname "${BASH_SOURCE}")/../../
E2E_GCE_HOME=$DESCHEDULER_ROOT/hack/e2e-gce
@@ -39,7 +39,7 @@ generate_kubeadm_instance_files() {
master_public_ip=$(gcloud compute instances list | grep $master_uuid|awk '{print $5}')
node1_public_ip=$(gcloud compute instances list | grep $node1_uuid|awk '{print $5}')
node2_public_ip=$(gcloud compute instances list | grep $node2_uuid|awk '{print $5}')
echo "kubeadm init --kubernetes-version=${kube_version} --apiserver-advertise-address=${master_public_ip}" --skip-preflight-checks --pod-network-cidr=10.96.0.0/12 > $E2E_GCE_HOME/kubeadm_install.sh
echo "kubeadm init --kubernetes-version=${kube_version} --apiserver-advertise-address=${master_public_ip}" --ignore-preflight-errors=all --pod-network-cidr=10.96.0.0/12 > $E2E_GCE_HOME/kubeadm_install.sh
}

26
kubernetes/configmap.yaml Normal file
View File

@@ -0,0 +1,26 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: descheduler-policy-configmap
data:
policy.yaml: |
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"RemoveDuplicates":
enabled: true
"RemovePodsViolatingInterPodAntiAffinity":
enabled: true
"LowNodeUtilization":
enabled: true
params:
nodeResourceUtilizationThresholds:
thresholds:
"cpu" : 20
"memory": 20
"pods": 20
targetThresholds:
"cpu" : 50
"memory": 50
"pods": 50

33
kubernetes/job.yaml Normal file
View File

@@ -0,0 +1,33 @@
apiVersion: batch/v1
kind: Job
metadata:
name: descheduler-job
namespace: kube-system
spec:
parallelism: 1
completions: 1
template:
metadata:
name: descheduler-pod
annotations:
scheduler.alpha.kubernetes.io/critical-pod: ""
spec:
containers:
- name: descheduler
image: descheduler:latest
volumeMounts:
- mountPath: /policy-dir
name: policy-volume
command:
- "/bin/descheduler"
args:
- "--policy-config-file"
- "/policy-dir/policy.yaml"
- "--v"
- "3"
restartPolicy: "Never"
serviceAccountName: descheduler-sa
volumes:
- name: policy-volume
configMap:
name: descheduler-policy-configmap

37
kubernetes/rbac.yaml Normal file
View File

@@ -0,0 +1,37 @@
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: descheduler-cluster-role
namespace: kube-system
rules:
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "watch", "list"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "watch", "list", "delete"]
- apiGroups: [""]
resources: ["pods/eviction"]
verbs: ["create"]
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: descheduler-sa
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: descehduler-cluster-role-binding
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: descheduler-cluster-role
subjects:
- name: descheduler-sa
kind: ServiceAccount
namespace: kube-system

View File

@@ -41,6 +41,7 @@ type NodeUsageMap struct {
bPods []*v1.Pod
gPods []*v1.Pod
}
type NodePodsMap map[*v1.Node][]*v1.Pod
func LowNodeUtilization(ds *options.DeschedulerServer, strategy api.DeschedulerStrategy, evictionPolicyGroupVersion string, nodes []*v1.Node, nodepodCount nodePodEvictedCount) {
@@ -59,7 +60,7 @@ func LowNodeUtilization(ds *options.DeschedulerServer, strategy api.DeschedulerS
return
}
npm := CreateNodePodsMap(ds.Client, nodes)
npm := createNodePodsMap(ds.Client, nodes)
lowNodes, targetNodes := classifyNodes(npm, thresholds, targetThresholds)
glog.V(1).Infof("Criteria for a node under utilization: CPU: %v, Mem: %v, Pods: %v",
@@ -151,6 +152,9 @@ func classifyNodes(npm NodePodsMap, thresholds api.ResourceThresholds, targetThr
return lowNodes, targetNodes
}
// evictPodsFromTargetNodes evicts pods based on priority, if all the pods on the node have priority, if not
// evicts them based on QoS as fallback option.
// TODO: @ravig Break this function into smaller functions.
func evictPodsFromTargetNodes(client clientset.Interface, evictionPolicyGroupVersion string, targetNodes, lowNodes []NodeUsageMap, targetThresholds api.ResourceThresholds, dryRun bool, maxPodsToEvict int, nodepodCount nodePodEvictedCount) int {
podsEvicted := 0
@@ -191,12 +195,27 @@ func evictPodsFromTargetNodes(client clientset.Interface, evictionPolicyGroupVer
glog.V(3).Infof("evicting pods from node %#v with usage: %#v", node.node.Name, node.usage)
currentPodsEvicted := nodepodCount[node.node]
// evict best effort pods
evictPods(node.bePods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCpu, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict)
// evict burstable pods
evictPods(node.bPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCpu, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict)
// evict guaranteed pods
evictPods(node.gPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCpu, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict)
// Check if one pod has priority, if yes, assume that all pods have priority and evict pods based on priority.
if node.allPods[0].Spec.Priority != nil {
glog.V(1).Infof("All pods have priority associated with them. Evicting pods based on priority")
evictablePods := make([]*v1.Pod, 0)
evictablePods = append(append(node.bPods, node.bePods...), node.gPods...)
// sort the evictable Pods based on priority. This also sorts them based on QoS. If there are multiple pods with same priority, they are sorted based on QoS tiers.
sortPodsBasedOnPriority(evictablePods)
evictPods(evictablePods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCpu, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict)
} else {
// TODO: Remove this when we support only priority.
// Falling back to evicting pods based on priority.
glog.V(1).Infof("Evicting pods based on QoS")
glog.V(1).Infof("There are %v non-evictable pods on the node", len(node.nonRemovablePods))
// evict best effort pods
evictPods(node.bePods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCpu, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict)
// evict burstable pods
evictPods(node.bPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCpu, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict)
// evict guaranteed pods
evictPods(node.gPods, client, evictionPolicyGroupVersion, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCpu, &totalMem, &currentPodsEvicted, dryRun, maxPodsToEvict)
}
nodepodCount[node.node] = currentPodsEvicted
podsEvicted = podsEvicted + nodepodCount[node.node]
glog.V(1).Infof("%v pods evicted from node %#v with usage %v", nodepodCount[node.node], node.node.Name, node.usage)
@@ -269,7 +288,30 @@ func SortNodesByUsage(nodes []NodeUsageMap) {
})
}
func CreateNodePodsMap(client clientset.Interface, nodes []*v1.Node) NodePodsMap {
// sortPodsBasedOnPriority sorts pods based on priority and if their priorities are equal, they are sorted based on QoS tiers.
func sortPodsBasedOnPriority(evictablePods []*v1.Pod) {
sort.Slice(evictablePods, func(i, j int) bool {
if evictablePods[i].Spec.Priority == nil && evictablePods[j].Spec.Priority != nil {
return true
}
if evictablePods[j].Spec.Priority == nil && evictablePods[i].Spec.Priority != nil {
return false
}
if (evictablePods[j].Spec.Priority == nil && evictablePods[i].Spec.Priority == nil) || (*evictablePods[i].Spec.Priority == *evictablePods[j].Spec.Priority) {
if podutil.IsBestEffortPod(evictablePods[i]) {
return true
}
if podutil.IsBurstablePod(evictablePods[i]) && podutil.IsGuaranteedPod(evictablePods[j]) {
return true
}
return false
}
return *evictablePods[i].Spec.Priority < *evictablePods[j].Spec.Priority
})
}
// createNodePodsMap returns nodepodsmap with evictable pods on node.
func createNodePodsMap(client clientset.Interface, nodes []*v1.Node) NodePodsMap {
npm := NodePodsMap{}
for _, node := range nodes {
pods, err := podutil.ListPodsOnANode(client, node)
@@ -308,6 +350,7 @@ func IsNodeWithLowUtilization(nodeThresholds api.ResourceThresholds, thresholds
return true
}
// Nodeutilization returns the current usage of node.
func NodeUtilization(node *v1.Node, pods []*v1.Pod) (api.ResourceThresholds, []*v1.Pod, []*v1.Pod, []*v1.Pod, []*v1.Pod, []*v1.Pod) {
bePods := []*v1.Pod{}
nonRemovablePods := []*v1.Pod{}

View File

@@ -28,10 +28,11 @@ import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"reflect"
)
// TODO: Make this table driven.
func TestLowNodeUtilization(t *testing.T) {
func TestLowNodeUtilizationWithoutPriority(t *testing.T) {
var thresholds = make(api.ResourceThresholds)
var targetThresholds = make(api.ResourceThresholds)
thresholds[v1.ResourceCPU] = 30
@@ -110,7 +111,7 @@ func TestLowNodeUtilization(t *testing.T) {
return true, nil, fmt.Errorf("Wrong node: %v", getAction.GetName())
})
expectedPodsEvicted := 3
npm := CreateNodePodsMap(fakeClient, []*v1.Node{n1, n2, n3})
npm := createNodePodsMap(fakeClient, []*v1.Node{n1, n2, n3})
lowNodes, targetNodes := classifyNodes(npm, thresholds, targetThresholds)
if len(lowNodes) != 1 {
t.Errorf("After ignoring unschedulable nodes, expected only one node to be under utilized.")
@@ -126,6 +127,151 @@ func TestLowNodeUtilization(t *testing.T) {
}
// TODO: Make this table driven.
func TestLowNodeUtilizationWithPriorities(t *testing.T) {
var thresholds = make(api.ResourceThresholds)
var targetThresholds = make(api.ResourceThresholds)
thresholds[v1.ResourceCPU] = 30
thresholds[v1.ResourcePods] = 30
targetThresholds[v1.ResourceCPU] = 50
targetThresholds[v1.ResourcePods] = 50
lowPriority := int32(0)
highPriority := int32(10000)
n1 := test.BuildTestNode("n1", 4000, 3000, 9)
n2 := test.BuildTestNode("n2", 4000, 3000, 10)
n3 := test.BuildTestNode("n3", 4000, 3000, 10)
// Making n3 node unschedulable so that it won't counted in lowUtilized nodes list.
n3.Spec.Unschedulable = true
p1 := test.BuildTestPod("p1", 400, 0, n1.Name)
p1.Spec.Priority = &highPriority
p2 := test.BuildTestPod("p2", 400, 0, n1.Name)
p2.Spec.Priority = &highPriority
p3 := test.BuildTestPod("p3", 400, 0, n1.Name)
p3.Spec.Priority = &highPriority
p4 := test.BuildTestPod("p4", 400, 0, n1.Name)
p4.Spec.Priority = &highPriority
p5 := test.BuildTestPod("p5", 400, 0, n1.Name)
p5.Spec.Priority = &lowPriority
// These won't be evicted.
p6 := test.BuildTestPod("p6", 400, 0, n1.Name)
p6.Spec.Priority = &highPriority
p7 := test.BuildTestPod("p7", 400, 0, n1.Name)
p7.Spec.Priority = &lowPriority
p8 := test.BuildTestPod("p8", 400, 0, n1.Name)
p8.Spec.Priority = &lowPriority
p1.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
p2.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
p3.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
p4.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
p5.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
// The following 4 pods won't get evicted.
// A daemonset.
p6.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
// A pod with local storage.
p7.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
p7.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
// A Mirror Pod.
p7.Annotations = test.GetMirrorPodAnnotation()
// A Critical Pod.
p8.Namespace = "kube-system"
p8.Annotations = test.GetCriticalPodAnnotation()
p9 := test.BuildTestPod("p9", 400, 0, n1.Name)
p9.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
fakeClient := &fake.Clientset{}
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
list := action.(core.ListAction)
fieldString := list.GetListRestrictions().Fields.String()
if strings.Contains(fieldString, "n1") {
return true, &v1.PodList{Items: []v1.Pod{*p1, *p2, *p3, *p4, *p5, *p6, *p7, *p8}}, nil
}
if strings.Contains(fieldString, "n2") {
return true, &v1.PodList{Items: []v1.Pod{*p9}}, nil
}
if strings.Contains(fieldString, "n3") {
return true, &v1.PodList{Items: []v1.Pod{}}, nil
}
return true, nil, fmt.Errorf("Failed to list: %v", list)
})
fakeClient.Fake.AddReactor("get", "nodes", func(action core.Action) (bool, runtime.Object, error) {
getAction := action.(core.GetAction)
switch getAction.GetName() {
case n1.Name:
return true, n1, nil
case n2.Name:
return true, n2, nil
case n3.Name:
return true, n3, nil
}
return true, nil, fmt.Errorf("Wrong node: %v", getAction.GetName())
})
expectedPodsEvicted := 3
npm := createNodePodsMap(fakeClient, []*v1.Node{n1, n2, n3})
lowNodes, targetNodes := classifyNodes(npm, thresholds, targetThresholds)
if len(lowNodes) != 1 {
t.Errorf("After ignoring unschedulable nodes, expected only one node to be under utilized.")
}
npe := nodePodEvictedCount{}
npe[n1] = 0
npe[n2] = 0
npe[n3] = 0
podsEvicted := evictPodsFromTargetNodes(fakeClient, "v1", targetNodes, lowNodes, targetThresholds, false, 3, npe)
if expectedPodsEvicted != podsEvicted {
t.Errorf("Expected %#v pods to be evicted but %#v got evicted", expectedPodsEvicted, podsEvicted)
}
}
func TestSortPodsByPriority(t *testing.T) {
n1 := test.BuildTestNode("n1", 4000, 3000, 9)
lowPriority := int32(0)
highPriority := int32(10000)
p1 := test.BuildTestPod("p1", 400, 0, n1.Name)
p1.Spec.Priority = &lowPriority
// BestEffort
p2 := test.BuildTestPod("p2", 400, 0, n1.Name)
p2.Spec.Priority = &highPriority
p2.Spec.Containers[0].Resources.Requests = nil
p2.Spec.Containers[0].Resources.Limits = nil
// Burstable
p3 := test.BuildTestPod("p3", 400, 0, n1.Name)
p3.Spec.Priority = &highPriority
// Guaranteed
p4 := test.BuildTestPod("p4", 400, 100, n1.Name)
p4.Spec.Priority = &highPriority
p4.Spec.Containers[0].Resources.Limits[v1.ResourceCPU] = *resource.NewMilliQuantity(400, resource.DecimalSI)
p4.Spec.Containers[0].Resources.Limits[v1.ResourceMemory] = *resource.NewQuantity(100, resource.DecimalSI)
// Best effort with nil priorities.
p5 := test.BuildTestPod("p5", 400, 100, n1.Name)
p5.Spec.Priority = nil
p6 := test.BuildTestPod("p6", 400, 100, n1.Name)
p6.Spec.Containers[0].Resources.Limits[v1.ResourceCPU] = *resource.NewMilliQuantity(400, resource.DecimalSI)
p6.Spec.Containers[0].Resources.Limits[v1.ResourceMemory] = *resource.NewQuantity(100, resource.DecimalSI)
p6.Spec.Priority = nil
podList := []*v1.Pod{p4, p3, p2, p1, p6, p5}
sortPodsBasedOnPriority(podList)
if !reflect.DeepEqual(podList[len(podList)-1], p4) {
t.Errorf("Expected last pod in sorted list to be %v which of highest priority and guaranteed but got %v", p4, podList[len(podList)-1])
}
}
func TestValidateThresholds(t *testing.T) {
tests := []struct {
name string

View File

@@ -37,6 +37,7 @@ func BuildTestPod(name string, cpu int64, memory int64, nodeName string) *v1.Pod
{
Resources: v1.ResourceRequirements{
Requests: v1.ResourceList{},
Limits: v1.ResourceList{},
},
},
},