mirror of
https://github.com/kubernetes-sigs/descheduler.git
synced 2026-01-26 21:31:18 +01:00
Compare commits
22 Commits
deschedule
...
release-1.
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ec76672ec7 | ||
|
|
44f2be4f01 | ||
|
|
6400f08b1a | ||
|
|
086962b765 | ||
|
|
4d841a2eef | ||
|
|
76ab9e765a | ||
|
|
78313651dc | ||
|
|
c6c3be6362 | ||
|
|
d2b8f07876 | ||
|
|
f959101622 | ||
|
|
870a518db4 | ||
|
|
60e41670af | ||
|
|
37c277ca1f | ||
|
|
0777837a13 | ||
|
|
5222378f5a | ||
|
|
104786844a | ||
|
|
b58fcb4d77 | ||
|
|
e40fab430c | ||
|
|
aaaa1c5ba6 | ||
|
|
e7d932ff4e | ||
|
|
e9beffd28d | ||
|
|
6bdfbb5b5f |
46
.github/ISSUE_TEMPLATE/bug_report.md
vendored
46
.github/ISSUE_TEMPLATE/bug_report.md
vendored
@@ -1,46 +0,0 @@
|
||||
---
|
||||
name: Bug report
|
||||
about: Create a bug report to help improve descheduler
|
||||
title: ''
|
||||
labels: 'kind/bug'
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
<!-- Please answer these questions before submitting your bug report. Thanks! -->
|
||||
|
||||
**What version of descheduler are you using?**
|
||||
|
||||
descheduler version:
|
||||
|
||||
|
||||
**Does this issue reproduce with the latest release?**
|
||||
|
||||
|
||||
**Which descheduler CLI options are you using?**
|
||||
|
||||
|
||||
**Please provide a copy of your descheduler policy config file**
|
||||
|
||||
|
||||
**What k8s version are you using (`kubectl version`)?**
|
||||
|
||||
<details><summary><code>kubectl version</code> Output</summary><br><pre>
|
||||
$ kubectl version
|
||||
|
||||
</pre></details>
|
||||
|
||||
|
||||
**What did you do?**
|
||||
|
||||
<!--
|
||||
If possible, provide a recipe for reproducing the error.
|
||||
A detailed sequence of steps describing what to do to observe the issue is good.
|
||||
A complete runnable bash shell script is best.
|
||||
-->
|
||||
|
||||
|
||||
**What did you expect to see?**
|
||||
|
||||
|
||||
**What did you see instead?**
|
||||
26
.github/ISSUE_TEMPLATE/feature_request.md
vendored
26
.github/ISSUE_TEMPLATE/feature_request.md
vendored
@@ -1,26 +0,0 @@
|
||||
---
|
||||
name: Feature request
|
||||
about: Suggest an idea for descheduler
|
||||
title: ''
|
||||
labels: 'kind/feature'
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
<!-- Please answer these questions before submitting your feature request. Thanks! -->
|
||||
|
||||
**Is your feature request related to a problem? Please describe.**
|
||||
<!-- A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] -->
|
||||
|
||||
**Describe the solution you'd like**
|
||||
<!-- A clear and concise description of what you want to happen. -->
|
||||
|
||||
**Describe alternatives you've considered**
|
||||
<!-- A clear and concise description of any alternative solutions or features you've considered. -->
|
||||
|
||||
**What version of descheduler are you using?**
|
||||
|
||||
descheduler version:
|
||||
|
||||
**Additional context**
|
||||
<!-- Add any other context or screenshots about the feature request here. -->
|
||||
18
.github/ISSUE_TEMPLATE/misc_request.md
vendored
18
.github/ISSUE_TEMPLATE/misc_request.md
vendored
@@ -1,18 +0,0 @@
|
||||
---
|
||||
name: Miscellaneous
|
||||
about: Not a bug and not a feature
|
||||
title: ''
|
||||
labels: ''
|
||||
assignees: ''
|
||||
|
||||
---
|
||||
|
||||
<!--
|
||||
Please do not use this to submit a bug report or feature request. Use the
|
||||
bug report or feature request options instead.
|
||||
|
||||
Also, please consider posting in the Kubernetes Slack #sig-scheduling channel
|
||||
instead of opening an issue if this is a support request.
|
||||
|
||||
Thanks!
|
||||
-->
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -3,5 +3,3 @@ _tmp/
|
||||
vendordiff.patch
|
||||
.idea/
|
||||
*.code-workspace
|
||||
.vscode/
|
||||
kind
|
||||
10
Dockerfile
10
Dockerfile
@@ -11,19 +11,15 @@
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
FROM golang:1.18.2
|
||||
FROM golang:1.13.9
|
||||
|
||||
WORKDIR /go/src/sigs.k8s.io/descheduler
|
||||
COPY . .
|
||||
ARG ARCH
|
||||
ARG VERSION
|
||||
RUN VERSION=${VERSION} make build.$ARCH
|
||||
RUN make
|
||||
|
||||
FROM scratch
|
||||
|
||||
MAINTAINER Kubernetes SIG Scheduling <kubernetes-sig-scheduling@googlegroups.com>
|
||||
|
||||
USER 1000
|
||||
MAINTAINER Avesh Agarwal <avesh.ncsu@gmail.com>
|
||||
|
||||
COPY --from=0 /go/src/sigs.k8s.io/descheduler/_output/bin/descheduler /bin/descheduler
|
||||
|
||||
|
||||
@@ -13,9 +13,7 @@
|
||||
# limitations under the License.
|
||||
FROM scratch
|
||||
|
||||
MAINTAINER Kubernetes SIG Scheduling <kubernetes-sig-scheduling@googlegroups.com>
|
||||
|
||||
USER 1000
|
||||
MAINTAINER Avesh Agarwal <avesh.ncsu@gmail.com>
|
||||
|
||||
COPY _output/bin/descheduler /bin/descheduler
|
||||
|
||||
|
||||
84
Makefile
84
Makefile
@@ -14,18 +14,16 @@
|
||||
|
||||
.PHONY: test
|
||||
|
||||
# VERSION is based on a date stamp plus the last commit
|
||||
VERSION?=v$(shell date +%Y%m%d)-$(shell git describe --tags --match "v*")
|
||||
BRANCH?=$(shell git branch --show-current)
|
||||
SHA1?=$(shell git rev-parse HEAD)
|
||||
# VERSION is currently based on the last commit
|
||||
VERSION?=$(shell git describe --tags)
|
||||
COMMIT=$(shell git rev-parse HEAD)
|
||||
BUILD=$(shell date +%FT%T%z)
|
||||
LDFLAG_LOCATION=sigs.k8s.io/descheduler/pkg/version
|
||||
ARCHS = amd64 arm arm64
|
||||
LDFLAG_LOCATION=sigs.k8s.io/descheduler/cmd/descheduler/app
|
||||
|
||||
LDFLAGS=-ldflags "-X ${LDFLAG_LOCATION}.version=${VERSION} -X ${LDFLAG_LOCATION}.buildDate=${BUILD} -X ${LDFLAG_LOCATION}.gitbranch=${BRANCH} -X ${LDFLAG_LOCATION}.gitsha1=${SHA1}"
|
||||
LDFLAGS=-ldflags "-X ${LDFLAG_LOCATION}.version=${VERSION} -X ${LDFLAG_LOCATION}.buildDate=${BUILD} -X ${LDFLAG_LOCATION}.gitCommit=${COMMIT}"
|
||||
|
||||
GOLANGCI_VERSION := v1.46.1
|
||||
HAS_GOLANGCI := $(shell ls _output/bin/golangci-lint 2> /dev/null)
|
||||
GOLANGCI_VERSION := v1.15.0
|
||||
HAS_GOLANGCI := $(shell which golangci-lint)
|
||||
|
||||
# REGISTRY is the container registry to push
|
||||
# into. The default is to push to the staging
|
||||
@@ -43,65 +41,30 @@ IMAGE_GCLOUD:=$(REGISTRY)/descheduler:$(VERSION)
|
||||
# In the future binaries can be uploaded to
|
||||
# GCS bucket gs://k8s-staging-descheduler.
|
||||
|
||||
HAS_HELM := $(shell which helm 2> /dev/null)
|
||||
HAS_HELM := $(shell which helm)
|
||||
|
||||
all: build
|
||||
|
||||
build:
|
||||
CGO_ENABLED=0 go build ${LDFLAGS} -o _output/bin/descheduler sigs.k8s.io/descheduler/cmd/descheduler
|
||||
|
||||
build.amd64:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build ${LDFLAGS} -o _output/bin/descheduler sigs.k8s.io/descheduler/cmd/descheduler
|
||||
|
||||
build.arm:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=arm GOARM=7 go build ${LDFLAGS} -o _output/bin/descheduler sigs.k8s.io/descheduler/cmd/descheduler
|
||||
|
||||
build.arm64:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build ${LDFLAGS} -o _output/bin/descheduler sigs.k8s.io/descheduler/cmd/descheduler
|
||||
|
||||
dev-image: build
|
||||
docker build -f Dockerfile.dev -t $(IMAGE) .
|
||||
|
||||
image:
|
||||
docker build --build-arg VERSION="$(VERSION)" --build-arg ARCH="amd64" -t $(IMAGE) .
|
||||
docker build -t $(IMAGE) .
|
||||
|
||||
image.amd64:
|
||||
docker build --build-arg VERSION="$(VERSION)" --build-arg ARCH="amd64" -t $(IMAGE)-amd64 .
|
||||
|
||||
image.arm:
|
||||
docker build --build-arg VERSION="$(VERSION)" --build-arg ARCH="arm" -t $(IMAGE)-arm .
|
||||
|
||||
image.arm64:
|
||||
docker build --build-arg VERSION="$(VERSION)" --build-arg ARCH="arm64" -t $(IMAGE)-arm64 .
|
||||
|
||||
push: image
|
||||
push-container-to-gcloud: image
|
||||
gcloud auth configure-docker
|
||||
docker tag $(IMAGE) $(IMAGE_GCLOUD)
|
||||
docker push $(IMAGE_GCLOUD)
|
||||
|
||||
push-all: image.amd64 image.arm image.arm64
|
||||
gcloud auth configure-docker
|
||||
for arch in $(ARCHS); do \
|
||||
docker tag $(IMAGE)-$${arch} $(IMAGE_GCLOUD)-$${arch} ;\
|
||||
docker push $(IMAGE_GCLOUD)-$${arch} ;\
|
||||
done
|
||||
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create $(IMAGE_GCLOUD) $(addprefix --amend $(IMAGE_GCLOUD)-, $(ARCHS))
|
||||
for arch in $(ARCHS); do \
|
||||
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest annotate --arch $${arch} $(IMAGE_GCLOUD) $(IMAGE_GCLOUD)-$${arch} ;\
|
||||
done
|
||||
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push $(IMAGE_GCLOUD) ;\
|
||||
push: push-container-to-gcloud
|
||||
|
||||
clean:
|
||||
rm -rf _output
|
||||
rm -rf _tmp
|
||||
|
||||
verify: verify-govet verify-spelling verify-gofmt verify-vendor lint lint-chart verify-toc verify-gen
|
||||
|
||||
verify-govet:
|
||||
./hack/verify-govet.sh
|
||||
|
||||
verify-spelling:
|
||||
./hack/verify-spelling.sh
|
||||
verify: verify-gofmt verify-vendor lint lint-chart
|
||||
|
||||
verify-gofmt:
|
||||
./hack/verify-gofmt.sh
|
||||
@@ -109,9 +72,6 @@ verify-gofmt:
|
||||
verify-vendor:
|
||||
./hack/verify-vendor.sh
|
||||
|
||||
verify-toc:
|
||||
./hack/verify-toc.sh
|
||||
|
||||
test-unit:
|
||||
./test/run-unit-tests.sh
|
||||
|
||||
@@ -122,26 +82,16 @@ gen:
|
||||
./hack/update-generated-conversions.sh
|
||||
./hack/update-generated-deep-copies.sh
|
||||
./hack/update-generated-defaulters.sh
|
||||
./hack/update-toc.sh
|
||||
|
||||
verify-gen:
|
||||
./hack/verify-conversions.sh
|
||||
./hack/verify-deep-copies.sh
|
||||
./hack/verify-defaulters.sh
|
||||
|
||||
#undo go mod changes caused by above.
|
||||
go mod tidy
|
||||
lint:
|
||||
ifndef HAS_GOLANGCI
|
||||
curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b ./_output/bin ${GOLANGCI_VERSION}
|
||||
curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | sh -s -- -b ./_output/bin ${GOLANGCI_VERSION}
|
||||
endif
|
||||
./_output/bin/golangci-lint run
|
||||
|
||||
lint-chart: ensure-helm-install
|
||||
helm lint ./charts/descheduler
|
||||
|
||||
test-helm: ensure-helm-install
|
||||
./test/run-helm-tests.sh
|
||||
|
||||
ensure-helm-install:
|
||||
lint-chart:
|
||||
ifndef HAS_HELM
|
||||
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 && chmod 700 ./get_helm.sh && ./get_helm.sh
|
||||
endif
|
||||
helm lint ./charts/descheduler
|
||||
|
||||
13
OWNERS
13
OWNERS
@@ -1,18 +1,11 @@
|
||||
approvers:
|
||||
- aveshagarwal
|
||||
- k82cn
|
||||
- ravisantoshgudimetla
|
||||
- damemi
|
||||
- ingvagabund
|
||||
- seanmalloy
|
||||
reviewers:
|
||||
- aveshagarwal
|
||||
- k82cn
|
||||
- ravisantoshgudimetla
|
||||
- damemi
|
||||
- seanmalloy
|
||||
- ingvagabund
|
||||
- lixiang233
|
||||
- a7i
|
||||
- janeliul
|
||||
emeritus_approvers:
|
||||
- aveshagarwal
|
||||
- k82cn
|
||||
- ravisantoshgudimetla
|
||||
|
||||
695
README.md
695
README.md
@@ -1,8 +1,9 @@
|
||||
[](https://goreportcard.com/report/sigs.k8s.io/descheduler)
|
||||

|
||||
[](https://goreportcard.com/report/sigs.k8s.io/descheduler)
|
||||
|
||||
# Descheduler for Kubernetes
|
||||
|
||||
## Introduction
|
||||
|
||||
Scheduling in Kubernetes is the process of binding pending pods to nodes, and is performed by
|
||||
a component of Kubernetes called kube-scheduler. The scheduler's decisions, whether or where a
|
||||
pod can or can not be scheduled, are guided by its configurable policy which comprises of set of
|
||||
@@ -22,47 +23,9 @@ Descheduler, based on its policy, finds pods that can be moved and evicts them.
|
||||
note, in current implementation, descheduler does not schedule replacement of evicted pods
|
||||
but relies on the default scheduler for that.
|
||||
|
||||
Table of Contents
|
||||
=================
|
||||
<!-- toc -->
|
||||
- [Quick Start](#quick-start)
|
||||
- [Run As A Job](#run-as-a-job)
|
||||
- [Run As A CronJob](#run-as-a-cronjob)
|
||||
- [Run As A Deployment](#run-as-a-deployment)
|
||||
- [Install Using Helm](#install-using-helm)
|
||||
- [Install Using Kustomize](#install-using-kustomize)
|
||||
- [User Guide](#user-guide)
|
||||
- [Policy and Strategies](#policy-and-strategies)
|
||||
- [RemoveDuplicates](#removeduplicates)
|
||||
- [LowNodeUtilization](#lownodeutilization)
|
||||
- [HighNodeUtilization](#highnodeutilization)
|
||||
- [RemovePodsViolatingInterPodAntiAffinity](#removepodsviolatinginterpodantiaffinity)
|
||||
- [RemovePodsViolatingNodeAffinity](#removepodsviolatingnodeaffinity)
|
||||
- [RemovePodsViolatingNodeTaints](#removepodsviolatingnodetaints)
|
||||
- [RemovePodsViolatingTopologySpreadConstraint](#removepodsviolatingtopologyspreadconstraint)
|
||||
- [RemovePodsHavingTooManyRestarts](#removepodshavingtoomanyrestarts)
|
||||
- [PodLifeTime](#podlifetime)
|
||||
- [RemoveFailedPods](#removefailedpods)
|
||||
- [Filter Pods](#filter-pods)
|
||||
- [Namespace filtering](#namespace-filtering)
|
||||
- [Priority filtering](#priority-filtering)
|
||||
- [Label filtering](#label-filtering)
|
||||
- [Node Fit filtering](#node-fit-filtering)
|
||||
- [Pod Evictions](#pod-evictions)
|
||||
- [Pod Disruption Budget (PDB)](#pod-disruption-budget-pdb)
|
||||
- [High Availability](#high-availability)
|
||||
- [Configure HA Mode](#configure-ha-mode)
|
||||
- [Metrics](#metrics)
|
||||
- [Compatibility Matrix](#compatibility-matrix)
|
||||
- [Getting Involved and Contributing](#getting-involved-and-contributing)
|
||||
- [Communicating With Contributors](#communicating-with-contributors)
|
||||
- [Roadmap](#roadmap)
|
||||
- [Code of conduct](#code-of-conduct)
|
||||
<!-- /toc -->
|
||||
|
||||
## Quick Start
|
||||
|
||||
The descheduler can be run as a `Job`, `CronJob`, or `Deployment` inside of a k8s cluster. It has the
|
||||
The descheduler can be run as a Job or CronJob inside of a k8s cluster. It has the
|
||||
advantage of being able to be run multiple times without needing user intervention.
|
||||
The descheduler pod is run as a critical pod in the `kube-system` namespace to avoid
|
||||
being evicted by itself or by the kubelet.
|
||||
@@ -70,52 +33,17 @@ being evicted by itself or by the kubelet.
|
||||
### Run As A Job
|
||||
|
||||
```
|
||||
kubectl create -f kubernetes/base/rbac.yaml
|
||||
kubectl create -f kubernetes/base/configmap.yaml
|
||||
kubectl create -f kubernetes/job/job.yaml
|
||||
kubectl create -f kubernetes/rbac.yaml
|
||||
kubectl create -f kubernetes/configmap.yaml
|
||||
kubectl create -f kubernetes/job.yaml
|
||||
```
|
||||
|
||||
### Run As A CronJob
|
||||
|
||||
```
|
||||
kubectl create -f kubernetes/base/rbac.yaml
|
||||
kubectl create -f kubernetes/base/configmap.yaml
|
||||
kubectl create -f kubernetes/cronjob/cronjob.yaml
|
||||
```
|
||||
|
||||
### Run As A Deployment
|
||||
|
||||
```
|
||||
kubectl create -f kubernetes/base/rbac.yaml
|
||||
kubectl create -f kubernetes/base/configmap.yaml
|
||||
kubectl create -f kubernetes/deployment/deployment.yaml
|
||||
```
|
||||
|
||||
### Install Using Helm
|
||||
|
||||
Starting with release v0.18.0 there is an official helm chart that can be used to install the
|
||||
descheduler. See the [helm chart README](https://github.com/kubernetes-sigs/descheduler/blob/master/charts/descheduler/README.md) for detailed instructions.
|
||||
|
||||
The descheduler helm chart is also listed on the [artifact hub](https://artifacthub.io/packages/helm/descheduler/descheduler).
|
||||
|
||||
### Install Using Kustomize
|
||||
|
||||
You can use kustomize to install descheduler.
|
||||
See the [resources | Kustomize](https://kubectl.docs.kubernetes.io/references/kustomize/resource/) for detailed instructions.
|
||||
|
||||
Run As A Job
|
||||
```
|
||||
kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/job?ref=v0.24.1' | kubectl apply -f -
|
||||
```
|
||||
|
||||
Run As A CronJob
|
||||
```
|
||||
kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/cronjob?ref=v0.24.1' | kubectl apply -f -
|
||||
```
|
||||
|
||||
Run As A Deployment
|
||||
```
|
||||
kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/deployment?ref=v0.24.1' | kubectl apply -f -
|
||||
kubectl create -f kubernetes/rbac.yaml
|
||||
kubectl create -f kubernetes/configmap.yaml
|
||||
kubectl create -f kubernetes/cronjob.yaml
|
||||
```
|
||||
|
||||
## User Guide
|
||||
@@ -124,68 +52,25 @@ See the [user guide](docs/user-guide.md) in the `/docs` directory.
|
||||
|
||||
## Policy and Strategies
|
||||
|
||||
Descheduler's policy is configurable and includes strategies that can be enabled or disabled. By default, all strategies are enabled.
|
||||
|
||||
The policy includes a common configuration that applies to all the strategies:
|
||||
| Name | Default Value | Description |
|
||||
|------|---------------|-------------|
|
||||
| `nodeSelector` | `nil` | limiting the nodes which are processed |
|
||||
| `evictLocalStoragePods` | `false` | allows eviction of pods with local storage |
|
||||
| `evictSystemCriticalPods` | `false` | [Warning: Will evict Kubernetes system pods] allows eviction of pods with any priority, including system pods like kube-dns |
|
||||
| `ignorePvcPods` | `false` | set whether PVC pods should be evicted or ignored |
|
||||
| `maxNoOfPodsToEvictPerNode` | `nil` | maximum number of pods evicted from each node (summed through all strategies) |
|
||||
| `maxNoOfPodsToEvictPerNamespace` | `nil` | maximum number of pods evicted from each namespace (summed through all strategies) |
|
||||
| `evictFailedBarePods` | `false` | allow eviction of pods without owner references and in failed phase |
|
||||
|
||||
As part of the policy, the parameters associated with each strategy can be configured.
|
||||
See each strategy for details on available parameters.
|
||||
|
||||
**Policy:**
|
||||
|
||||
```yaml
|
||||
apiVersion: "descheduler/v1alpha1"
|
||||
kind: "DeschedulerPolicy"
|
||||
nodeSelector: prod=dev
|
||||
evictFailedBarePods: false
|
||||
evictLocalStoragePods: true
|
||||
evictSystemCriticalPods: true
|
||||
maxNoOfPodsToEvictPerNode: 40
|
||||
ignorePvcPods: false
|
||||
strategies:
|
||||
...
|
||||
```
|
||||
|
||||
The following diagram provides a visualization of most of the strategies to help
|
||||
categorize how strategies fit together.
|
||||
|
||||

|
||||
Descheduler's policy is configurable and includes strategies that can be enabled or disabled.
|
||||
Seven strategies `RemoveDuplicates`, `LowNodeUtilization`, `RemovePodsViolatingInterPodAntiAffinity`,
|
||||
`RemovePodsViolatingNodeAffinity`, `RemovePodsViolatingNodeTaints`, `RemovePodsHavingTooManyRestarts`, and `PodLifeTime`
|
||||
are currently implemented. As part of the policy, the parameters associated with the strategies can be configured too.
|
||||
By default, all strategies are enabled.
|
||||
|
||||
### RemoveDuplicates
|
||||
|
||||
This strategy makes sure that there is only one pod associated with a ReplicaSet (RS),
|
||||
ReplicationController (RC), StatefulSet, or Job running on the same node. If there are more,
|
||||
This strategy makes sure that there is only one pod associated with a Replica Set (RS),
|
||||
Replication Controller (RC), Deployment, or Job running on the same node. If there are more,
|
||||
those duplicate pods are evicted for better spreading of pods in a cluster. This issue could happen
|
||||
if some nodes went down due to whatever reasons, and pods on them were moved to other nodes leading to
|
||||
more than one pod associated with a RS or RC, for example, running on the same node. Once the failed nodes
|
||||
are ready again, this strategy could be enabled to evict those duplicate pods.
|
||||
|
||||
It provides one optional parameter, `excludeOwnerKinds`, which is a list of OwnerRef `Kind`s. If a pod
|
||||
has any of these `Kind`s listed as an `OwnerRef`, that pod will not be considered for eviction. Note that
|
||||
pods created by Deployments are considered for eviction by this strategy. The `excludeOwnerKinds` parameter
|
||||
should include `ReplicaSet` to have pods created by Deployments excluded.
|
||||
It provides one optional parameter, `ExcludeOwnerKinds`, which is a list of OwnerRef `Kind`s. If a pod
|
||||
has any of these `Kind`s listed as an `OwnerRef`, that pod will not be considered for eviction.
|
||||
|
||||
**Parameters:**
|
||||
|
||||
|Name|Type|
|
||||
|---|---|
|
||||
|`excludeOwnerKinds`|list(string)|
|
||||
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|
||||
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|
||||
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|
||||
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
|
||||
|
||||
**Example:**
|
||||
```yaml
|
||||
```
|
||||
apiVersion: "descheduler/v1alpha1"
|
||||
kind: "DeschedulerPolicy"
|
||||
strategies:
|
||||
@@ -204,49 +89,19 @@ in the hope that recreation of evicted pods will be scheduled on these underutil
|
||||
parameters of this strategy are configured under `nodeResourceUtilizationThresholds`.
|
||||
|
||||
The under utilization of nodes is determined by a configurable threshold `thresholds`. The threshold
|
||||
`thresholds` can be configured for cpu, memory, number of pods, and extended resources in terms of percentage (the percentage is
|
||||
calculated as the current resources requested on the node vs [total allocatable](https://kubernetes.io/docs/concepts/architecture/nodes/#capacity).
|
||||
For pods, this means the number of pods on the node as a fraction of the pod capacity set for that node).
|
||||
|
||||
If a node's usage is below threshold for all (cpu, memory, number of pods and extended resources), the node is considered underutilized.
|
||||
`thresholds` can be configured for cpu, memory, and number of pods in terms of percentage. If a node's
|
||||
usage is below threshold for all (cpu, memory, and number of pods), the node is considered underutilized.
|
||||
Currently, pods request resource requirements are considered for computing node resource utilization.
|
||||
|
||||
There is another configurable threshold, `targetThresholds`, that is used to compute those potential nodes
|
||||
from where pods could be evicted. If a node's usage is above targetThreshold for any (cpu, memory, number of pods, or extended resources),
|
||||
the node is considered over utilized. Any node between the thresholds, `thresholds` and `targetThresholds` is
|
||||
from where pods could be evicted. Any node, between the thresholds, `thresholds` and `targetThresholds` is
|
||||
considered appropriately utilized and is not considered for eviction. The threshold, `targetThresholds`,
|
||||
can be configured for cpu, memory, and number of pods too in terms of percentage.
|
||||
|
||||
These thresholds, `thresholds` and `targetThresholds`, could be tuned as per your cluster requirements. Note that this
|
||||
strategy evicts pods from `overutilized nodes` (those with usage above `targetThresholds`) to `underutilized nodes`
|
||||
(those with usage below `thresholds`), it will abort if any number of `underutilized nodes` or `overutilized nodes` is zero.
|
||||
These thresholds, `thresholds` and `targetThresholds`, could be tuned as per your cluster requirements.
|
||||
Here is an example of a policy for this strategy:
|
||||
|
||||
Additionally, the strategy accepts a `useDeviationThresholds` parameter.
|
||||
If that parameter is set to `true`, the thresholds are considered as percentage deviations from mean resource usage.
|
||||
`thresholds` will be deducted from the mean among all nodes and `targetThresholds` will be added to the mean.
|
||||
A resource consumption above (resp. below) this window is considered as overutilization (resp. underutilization).
|
||||
|
||||
**NOTE:** Node resource consumption is determined by the requests and limits of pods, not actual usage.
|
||||
This approach is chosen in order to maintain consistency with the kube-scheduler, which follows the same
|
||||
design for scheduling pods onto nodes. This means that resource usage as reported by Kubelet (or commands
|
||||
like `kubectl top`) may differ from the calculated consumption, due to these components reporting
|
||||
actual usage metrics. Implementing metrics-based descheduling is currently TODO for the project.
|
||||
|
||||
**Parameters:**
|
||||
|
||||
|Name|Type|
|
||||
|---|---|
|
||||
|`thresholds`|map(string:int)|
|
||||
|`targetThresholds`|map(string:int)|
|
||||
|`numberOfNodes`|int|
|
||||
|`useDeviationThresholds`|bool|
|
||||
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|
||||
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|
||||
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
|
||||
|
||||
**Example:**
|
||||
|
||||
```yaml
|
||||
```
|
||||
apiVersion: "descheduler/v1alpha1"
|
||||
kind: "DeschedulerPolicy"
|
||||
strategies:
|
||||
@@ -264,111 +119,26 @@ strategies:
|
||||
"pods": 50
|
||||
```
|
||||
|
||||
Policy should pass the following validation checks:
|
||||
* Three basic native types of resources are supported: `cpu`, `memory` and `pods`.
|
||||
If any of these resource types is not specified, all its thresholds default to 100% to avoid nodes going from underutilized to overutilized.
|
||||
* Extended resources are supported. For example, resource type `nvidia.com/gpu` is specified for GPU node utilization. Extended resources are optional,
|
||||
and will not be used to compute node's usage if it's not specified in `thresholds` and `targetThresholds` explicitly.
|
||||
* `thresholds` or `targetThresholds` can not be nil and they must configure exactly the same types of resources.
|
||||
* The valid range of the resource's percentage value is \[0, 100\]
|
||||
* Percentage value of `thresholds` can not be greater than `targetThresholds` for the same resource.
|
||||
|
||||
There is another parameter associated with the `LowNodeUtilization` strategy, called `numberOfNodes`.
|
||||
This parameter can be configured to activate the strategy only when the number of under utilized nodes
|
||||
are above the configured value. This could be helpful in large clusters where a few nodes could go
|
||||
under utilized frequently or for a short period of time. By default, `numberOfNodes` is set to zero.
|
||||
|
||||
### HighNodeUtilization
|
||||
|
||||
This strategy finds nodes that are under utilized and evicts pods from the nodes in the hope that these pods will be
|
||||
scheduled compactly into fewer nodes. Used in conjunction with node auto-scaling, this strategy is intended to help
|
||||
trigger down scaling of under utilized nodes.
|
||||
This strategy **must** be used with the scheduler scoring strategy `MostAllocated`. The parameters of this strategy are
|
||||
configured under `nodeResourceUtilizationThresholds`.
|
||||
|
||||
The under utilization of nodes is determined by a configurable threshold `thresholds`. The threshold
|
||||
`thresholds` can be configured for cpu, memory, number of pods, and extended resources in terms of percentage. The percentage is
|
||||
calculated as the current resources requested on the node vs [total allocatable](https://kubernetes.io/docs/concepts/architecture/nodes/#capacity).
|
||||
For pods, this means the number of pods on the node as a fraction of the pod capacity set for that node.
|
||||
|
||||
If a node's usage is below threshold for all (cpu, memory, number of pods and extended resources), the node is considered underutilized.
|
||||
Currently, pods request resource requirements are considered for computing node resource utilization.
|
||||
Any node above `thresholds` is considered appropriately utilized and is not considered for eviction.
|
||||
|
||||
The `thresholds` param could be tuned as per your cluster requirements. Note that this
|
||||
strategy evicts pods from `underutilized nodes` (those with usage below `thresholds`)
|
||||
so that they can be recreated in appropriately utilized nodes.
|
||||
The strategy will abort if any number of `underutilized nodes` or `appropriately utilized nodes` is zero.
|
||||
|
||||
**NOTE:** Node resource consumption is determined by the requests and limits of pods, not actual usage.
|
||||
This approach is chosen in order to maintain consistency with the kube-scheduler, which follows the same
|
||||
design for scheduling pods onto nodes. This means that resource usage as reported by Kubelet (or commands
|
||||
like `kubectl top`) may differ from the calculated consumption, due to these components reporting
|
||||
actual usage metrics. Implementing metrics-based descheduling is currently TODO for the project.
|
||||
|
||||
**Parameters:**
|
||||
|
||||
|Name|Type|
|
||||
|---|---|
|
||||
|`thresholds`|map(string:int)|
|
||||
|`numberOfNodes`|int|
|
||||
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|
||||
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|
||||
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
|
||||
|
||||
**Example:**
|
||||
|
||||
```yaml
|
||||
apiVersion: "descheduler/v1alpha1"
|
||||
kind: "DeschedulerPolicy"
|
||||
strategies:
|
||||
"HighNodeUtilization":
|
||||
enabled: true
|
||||
params:
|
||||
nodeResourceUtilizationThresholds:
|
||||
thresholds:
|
||||
"cpu" : 20
|
||||
"memory": 20
|
||||
"pods": 20
|
||||
```
|
||||
|
||||
Policy should pass the following validation checks:
|
||||
* Three basic native types of resources are supported: `cpu`, `memory` and `pods`. If any of these resource types is not specified, all its thresholds default to 100%.
|
||||
* Extended resources are supported. For example, resource type `nvidia.com/gpu` is specified for GPU node utilization. Extended resources are optional, and will not be used to compute node's usage if it's not specified in `thresholds` explicitly.
|
||||
* `thresholds` can not be nil.
|
||||
* The valid range of the resource's percentage value is \[0, 100\]
|
||||
|
||||
There is another parameter associated with the `HighNodeUtilization` strategy, called `numberOfNodes`.
|
||||
This parameter can be configured to activate the strategy only when the number of under utilized nodes
|
||||
is above the configured value. This could be helpful in large clusters where a few nodes could go
|
||||
under utilized frequently or for a short period of time. By default, `numberOfNodes` is set to zero.
|
||||
|
||||
### RemovePodsViolatingInterPodAntiAffinity
|
||||
|
||||
This strategy makes sure that pods violating interpod anti-affinity are removed from nodes. For example,
|
||||
if there is podA on a node and podB and podC (running on the same node) have anti-affinity rules which prohibit
|
||||
them to run on the same node, then podA will be evicted from the node so that podB and podC could run. This
|
||||
issue could happen, when the anti-affinity rules for podB and podC are created when they are already running on
|
||||
node.
|
||||
node. Currently, there are no parameters associated with this strategy. To disable this strategy, the
|
||||
policy should look like:
|
||||
|
||||
**Parameters:**
|
||||
|
||||
|Name|Type|
|
||||
|---|---|
|
||||
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|
||||
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|
||||
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|
||||
|`labelSelector`|(see [label filtering](#label-filtering))|
|
||||
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
|
||||
|
||||
**Example:**
|
||||
|
||||
```yaml
|
||||
```
|
||||
apiVersion: "descheduler/v1alpha1"
|
||||
kind: "DeschedulerPolicy"
|
||||
strategies:
|
||||
"RemovePodsViolatingInterPodAntiAffinity":
|
||||
enabled: true
|
||||
enabled: false
|
||||
```
|
||||
|
||||
### RemovePodsViolatingNodeAffinity
|
||||
@@ -389,20 +159,9 @@ of scheduling. Over time nodeA stops to satisfy the rule. When the strategy gets
|
||||
executed and there is another node available that satisfies the node affinity rule,
|
||||
podA gets evicted from nodeA.
|
||||
|
||||
**Parameters:**
|
||||
The policy file should look like:
|
||||
|
||||
|Name|Type|
|
||||
|---|---|
|
||||
|`nodeAffinityType`|list(string)|
|
||||
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|
||||
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|
||||
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|
||||
|`labelSelector`|(see [label filtering](#label-filtering))|
|
||||
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
|
||||
|
||||
**Example:**
|
||||
|
||||
```yaml
|
||||
```
|
||||
apiVersion: "descheduler/v1alpha1"
|
||||
kind: "DeschedulerPolicy"
|
||||
strategies:
|
||||
@@ -418,97 +177,21 @@ strategies:
|
||||
This strategy makes sure that pods violating NoSchedule taints on nodes are removed. For example there is a
|
||||
pod "podA" with a toleration to tolerate a taint ``key=value:NoSchedule`` scheduled and running on the tainted
|
||||
node. If the node's taint is subsequently updated/removed, taint is no longer satisfied by its pods' tolerations
|
||||
and will be evicted.
|
||||
and will be evicted. The policy file should look like:
|
||||
|
||||
Node taints can be excluded from consideration by specifying a list of excludedTaints. If a node taint key **or**
|
||||
key=value matches an excludedTaints entry, the taint will be ignored.
|
||||
|
||||
For example, excludedTaints entry "dedicated" would match all taints with key "dedicated", regardless of value.
|
||||
excludedTaints entry "dedicated=special-user" would match taints with key "dedicated" and value "special-user".
|
||||
|
||||
**Parameters:**
|
||||
|
||||
|Name|Type|
|
||||
|---|---|
|
||||
|`excludedTaints`|list(string)|
|
||||
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|
||||
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|
||||
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|
||||
|`labelSelector`|(see [label filtering](#label-filtering))|
|
||||
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
|
||||
|
||||
**Example:**
|
||||
|
||||
````yaml
|
||||
````
|
||||
apiVersion: "descheduler/v1alpha1"
|
||||
kind: "DeschedulerPolicy"
|
||||
strategies:
|
||||
"RemovePodsViolatingNodeTaints":
|
||||
enabled: true
|
||||
params:
|
||||
excludedTaints:
|
||||
- dedicated=special-user # exclude taints with key "dedicated" and value "special-user"
|
||||
- reserved # exclude all taints with key "reserved"
|
||||
````
|
||||
|
||||
### RemovePodsViolatingTopologySpreadConstraint
|
||||
|
||||
This strategy makes sure that pods violating [topology spread constraints](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/)
|
||||
are evicted from nodes. Specifically, it tries to evict the minimum number of pods required to balance topology domains to within each constraint's `maxSkew`.
|
||||
This strategy requires k8s version 1.18 at a minimum.
|
||||
|
||||
By default, this strategy only deals with hard constraints, setting parameter `includeSoftConstraints` to `true` will
|
||||
include soft constraints.
|
||||
|
||||
Strategy parameter `labelSelector` is not utilized when balancing topology domains and is only applied during eviction to determine if the pod can be evicted.
|
||||
|
||||
**Parameters:**
|
||||
|
||||
|Name|Type|
|
||||
|---|---|
|
||||
|`includeSoftConstraints`|bool|
|
||||
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|
||||
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|
||||
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|
||||
|`labelSelector`|(see [label filtering](#label-filtering))|
|
||||
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
|
||||
|
||||
**Example:**
|
||||
|
||||
```yaml
|
||||
apiVersion: "descheduler/v1alpha1"
|
||||
kind: "DeschedulerPolicy"
|
||||
strategies:
|
||||
"RemovePodsViolatingTopologySpreadConstraint":
|
||||
enabled: true
|
||||
params:
|
||||
includeSoftConstraints: false
|
||||
```
|
||||
|
||||
|
||||
### RemovePodsHavingTooManyRestarts
|
||||
|
||||
This strategy makes sure that pods having too many restarts are removed from nodes. For example a pod with EBS/PD that
|
||||
can't get the volume/disk attached to the instance, then the pod should be re-scheduled to other nodes. Its parameters
|
||||
include `podRestartThreshold`, which is the number of restarts (summed over all eligible containers) at which a pod
|
||||
should be evicted, and `includingInitContainers`, which determines whether init container restarts should be factored
|
||||
into that calculation.
|
||||
This strategy makes sure that pods having too many restarts are removed from nodes. For example a pod with EBS/PD that can't get the volume/disk attached to the instance, then the pod should be re-scheduled to other nodes.
|
||||
|
||||
**Parameters:**
|
||||
|
||||
|Name|Type|
|
||||
|---|---|
|
||||
|`podRestartThreshold`|int|
|
||||
|`includingInitContainers`|bool|
|
||||
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|
||||
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|
||||
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|
||||
|`labelSelector`|(see [label filtering](#label-filtering))|
|
||||
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
|
||||
|
||||
**Example:**
|
||||
|
||||
```yaml
|
||||
```
|
||||
apiVersion: "descheduler/v1alpha1"
|
||||
kind: "DeschedulerPolicy"
|
||||
strategies:
|
||||
@@ -522,305 +205,38 @@ strategies:
|
||||
|
||||
### PodLifeTime
|
||||
|
||||
This strategy evicts pods that are older than `maxPodLifeTimeSeconds`.
|
||||
This strategy evicts pods that are older than `.strategies.PodLifeTime.params.maxPodLifeTimeSeconds` The policy
|
||||
file should look like:
|
||||
|
||||
You can also specify `podStatusPhases` to `only` evict pods with specific `StatusPhases`, currently this parameter is limited
|
||||
to `Running` and `Pending`.
|
||||
|
||||
**Parameters:**
|
||||
|
||||
|Name|Type|
|
||||
|---|---|
|
||||
|`maxPodLifeTimeSeconds`|int|
|
||||
|`podStatusPhases`|list(string)|
|
||||
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|
||||
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|
||||
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|
||||
|`labelSelector`|(see [label filtering](#label-filtering))|
|
||||
|
||||
**Example:**
|
||||
|
||||
```yaml
|
||||
````
|
||||
apiVersion: "descheduler/v1alpha1"
|
||||
kind: "DeschedulerPolicy"
|
||||
strategies:
|
||||
"PodLifeTime":
|
||||
enabled: true
|
||||
params:
|
||||
podLifeTime:
|
||||
maxPodLifeTimeSeconds: 86400
|
||||
podStatusPhases:
|
||||
- "Pending"
|
||||
```
|
||||
|
||||
### RemoveFailedPods
|
||||
|
||||
This strategy evicts pods that are in failed status phase.
|
||||
You can provide an optional parameter to filter by failed `reasons`.
|
||||
`reasons` can be expanded to include reasons of InitContainers as well by setting the optional parameter `includingInitContainers` to `true`.
|
||||
You can specify an optional parameter `minPodLifetimeSeconds` to evict pods that are older than specified seconds.
|
||||
Lastly, you can specify the optional parameter `excludeOwnerKinds` and if a pod
|
||||
has any of these `Kind`s listed as an `OwnerRef`, that pod will not be considered for eviction.
|
||||
|
||||
**Parameters:**
|
||||
|
||||
|Name|Type|
|
||||
|---|---|
|
||||
|`minPodLifetimeSeconds`|uint|
|
||||
|`excludeOwnerKinds`|list(string)|
|
||||
|`reasons`|list(string)|
|
||||
|`includingInitContainers`|bool|
|
||||
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|
||||
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|
||||
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|
||||
|`labelSelector`|(see [label filtering](#label-filtering))|
|
||||
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
|
||||
|
||||
**Example:**
|
||||
|
||||
```yaml
|
||||
apiVersion: "descheduler/v1alpha1"
|
||||
kind: "DeschedulerPolicy"
|
||||
strategies:
|
||||
"RemoveFailedPods":
|
||||
enabled: true
|
||||
params:
|
||||
failedPods:
|
||||
reasons:
|
||||
- "NodeAffinity"
|
||||
includingInitContainers: true
|
||||
excludeOwnerKinds:
|
||||
- "Job"
|
||||
minPodLifetimeSeconds: 3600
|
||||
```
|
||||
|
||||
## Filter Pods
|
||||
|
||||
### Namespace filtering
|
||||
|
||||
The following strategies accept a `namespaces` parameter which allows to specify a list of including, resp. excluding namespaces:
|
||||
* `PodLifeTime`
|
||||
* `RemovePodsHavingTooManyRestarts`
|
||||
* `RemovePodsViolatingNodeTaints`
|
||||
* `RemovePodsViolatingNodeAffinity`
|
||||
* `RemovePodsViolatingInterPodAntiAffinity`
|
||||
* `RemoveDuplicates`
|
||||
* `RemovePodsViolatingTopologySpreadConstraint`
|
||||
* `RemoveFailedPods`
|
||||
|
||||
For example:
|
||||
|
||||
```yaml
|
||||
apiVersion: "descheduler/v1alpha1"
|
||||
kind: "DeschedulerPolicy"
|
||||
strategies:
|
||||
"PodLifeTime":
|
||||
enabled: true
|
||||
params:
|
||||
podLifeTime:
|
||||
maxPodLifeTimeSeconds: 86400
|
||||
namespaces:
|
||||
include:
|
||||
- "namespace1"
|
||||
- "namespace2"
|
||||
```
|
||||
|
||||
In the examples `PodLifeTime` gets executed only over `namespace1` and `namespace2`.
|
||||
The similar holds for `exclude` field:
|
||||
|
||||
```yaml
|
||||
apiVersion: "descheduler/v1alpha1"
|
||||
kind: "DeschedulerPolicy"
|
||||
strategies:
|
||||
"PodLifeTime":
|
||||
enabled: true
|
||||
params:
|
||||
podLifeTime:
|
||||
maxPodLifeTimeSeconds: 86400
|
||||
namespaces:
|
||||
exclude:
|
||||
- "namespace1"
|
||||
- "namespace2"
|
||||
```
|
||||
|
||||
The strategy gets executed over all namespaces but `namespace1` and `namespace2`.
|
||||
|
||||
It's not allowed to compute `include` with `exclude` field.
|
||||
|
||||
### Priority filtering
|
||||
|
||||
All strategies are able to configure a priority threshold, only pods under the threshold can be evicted. You can
|
||||
specify this threshold by setting `thresholdPriorityClassName`(setting the threshold to the value of the given
|
||||
priority class) or `thresholdPriority`(directly setting the threshold) parameters. By default, this threshold
|
||||
is set to the value of `system-cluster-critical` priority class.
|
||||
|
||||
Note: Setting `evictSystemCriticalPods` to true disables priority filtering entirely.
|
||||
|
||||
E.g.
|
||||
|
||||
Setting `thresholdPriority`
|
||||
```yaml
|
||||
apiVersion: "descheduler/v1alpha1"
|
||||
kind: "DeschedulerPolicy"
|
||||
strategies:
|
||||
"PodLifeTime":
|
||||
enabled: true
|
||||
params:
|
||||
podLifeTime:
|
||||
maxPodLifeTimeSeconds: 86400
|
||||
thresholdPriority: 10000
|
||||
```
|
||||
|
||||
Setting `thresholdPriorityClassName`
|
||||
```yaml
|
||||
apiVersion: "descheduler/v1alpha1"
|
||||
kind: "DeschedulerPolicy"
|
||||
strategies:
|
||||
"PodLifeTime":
|
||||
enabled: true
|
||||
params:
|
||||
podLifeTime:
|
||||
maxPodLifeTimeSeconds: 86400
|
||||
thresholdPriorityClassName: "priorityclass1"
|
||||
```
|
||||
|
||||
Note that you can't configure both `thresholdPriority` and `thresholdPriorityClassName`, if the given priority class
|
||||
does not exist, descheduler won't create it and will throw an error.
|
||||
|
||||
### Label filtering
|
||||
|
||||
The following strategies can configure a [standard kubernetes labelSelector](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.24/#labelselector-v1-meta)
|
||||
to filter pods by their labels:
|
||||
|
||||
* `PodLifeTime`
|
||||
* `RemovePodsHavingTooManyRestarts`
|
||||
* `RemovePodsViolatingNodeTaints`
|
||||
* `RemovePodsViolatingNodeAffinity`
|
||||
* `RemovePodsViolatingInterPodAntiAffinity`
|
||||
* `RemovePodsViolatingTopologySpreadConstraint`
|
||||
* `RemoveFailedPods`
|
||||
|
||||
This allows running strategies among pods the descheduler is interested in.
|
||||
|
||||
For example:
|
||||
|
||||
```yaml
|
||||
apiVersion: "descheduler/v1alpha1"
|
||||
kind: "DeschedulerPolicy"
|
||||
strategies:
|
||||
"PodLifeTime":
|
||||
enabled: true
|
||||
params:
|
||||
podLifeTime:
|
||||
maxPodLifeTimeSeconds: 86400
|
||||
labelSelector:
|
||||
matchLabels:
|
||||
component: redis
|
||||
matchExpressions:
|
||||
- {key: tier, operator: In, values: [cache]}
|
||||
- {key: environment, operator: NotIn, values: [dev]}
|
||||
```
|
||||
|
||||
|
||||
### Node Fit filtering
|
||||
|
||||
The following strategies accept a `nodeFit` boolean parameter which can optimize descheduling:
|
||||
* `RemoveDuplicates`
|
||||
* `LowNodeUtilization`
|
||||
* `HighNodeUtilization`
|
||||
* `RemovePodsViolatingInterPodAntiAffinity`
|
||||
* `RemovePodsViolatingNodeAffinity`
|
||||
* `RemovePodsViolatingNodeTaints`
|
||||
* `RemovePodsViolatingTopologySpreadConstraint`
|
||||
* `RemovePodsHavingTooManyRestarts`
|
||||
* `RemoveFailedPods`
|
||||
|
||||
If set to `true` the descheduler will consider whether or not the pods that meet eviction criteria will fit on other nodes before evicting them. If a pod cannot be rescheduled to another node, it will not be evicted. Currently the following criteria are considered when setting `nodeFit` to `true`:
|
||||
- A `nodeSelector` on the pod
|
||||
- Any `tolerations` on the pod and any `taints` on the other nodes
|
||||
- `nodeAffinity` on the pod
|
||||
- Resource `requests` made by the pod and the resources available on other nodes
|
||||
- Whether any of the other nodes are marked as `unschedulable`
|
||||
|
||||
E.g.
|
||||
|
||||
```yaml
|
||||
apiVersion: "descheduler/v1alpha1"
|
||||
kind: "DeschedulerPolicy"
|
||||
strategies:
|
||||
"LowNodeUtilization":
|
||||
enabled: true
|
||||
params:
|
||||
nodeResourceUtilizationThresholds:
|
||||
thresholds:
|
||||
"cpu": 20
|
||||
"memory": 20
|
||||
"pods": 20
|
||||
targetThresholds:
|
||||
"cpu": 50
|
||||
"memory": 50
|
||||
"pods": 50
|
||||
nodeFit: true
|
||||
```
|
||||
|
||||
Note that node fit filtering references the current pod spec, and not that of it's owner.
|
||||
Thus, if the pod is owned by a ReplicationController (and that ReplicationController was modified recently),
|
||||
the pod may be running with an outdated spec, which the descheduler will reference when determining node fit.
|
||||
This is expected behavior as the descheduler is a "best-effort" mechanism.
|
||||
|
||||
Using Deployments instead of ReplicationControllers provides an automated rollout of pod spec changes, therefore ensuring that the descheduler has an up-to-date view of the cluster state.
|
||||
````
|
||||
|
||||
## Pod Evictions
|
||||
|
||||
When the descheduler decides to evict pods from a node, it employs the following general mechanism:
|
||||
|
||||
* [Critical pods](https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/) (with priorityClassName set to system-cluster-critical or system-node-critical) are never evicted (unless `evictSystemCriticalPods: true` is set).
|
||||
* Pods (static or mirrored pods or standalone pods) not part of an ReplicationController, ReplicaSet(Deployment), StatefulSet, or Job are
|
||||
never evicted because these pods won't be recreated. (Standalone pods in failed status phase can be evicted by setting `evictFailedBarePods: true`)
|
||||
* [Critical pods](https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/) (with priorityClassName set to system-cluster-critical or system-node-critical) are never evicted.
|
||||
* Pods (static or mirrored pods or stand alone pods) not part of an RC, RS, Deployment or Job are
|
||||
never evicted because these pods won't be recreated.
|
||||
* Pods associated with DaemonSets are never evicted.
|
||||
* Pods with local storage are never evicted (unless `evictLocalStoragePods: true` is set).
|
||||
* Pods with PVCs are evicted (unless `ignorePvcPods: true` is set).
|
||||
* In `LowNodeUtilization` and `RemovePodsViolatingInterPodAntiAffinity`, pods are evicted by their priority from low to high, and if they have same priority,
|
||||
best effort pods are evicted before burstable and guaranteed pods.
|
||||
* All types of pods with the annotation `descheduler.alpha.kubernetes.io/evict` are eligible for eviction. This
|
||||
* Pods with local storage are never evicted.
|
||||
* Best efforts pods are evicted before burstable and guaranteed pods.
|
||||
* All types of pods with the annotation descheduler.alpha.kubernetes.io/evict are evicted. This
|
||||
annotation is used to override checks which prevent eviction and users can select which pod is evicted.
|
||||
Users should know how and if the pod will be recreated.
|
||||
* Pods with a non-nil DeletionTimestamp are not evicted by default.
|
||||
|
||||
Setting `--v=4` or greater on the Descheduler will log all reasons why any pod is not evictable.
|
||||
|
||||
### Pod Disruption Budget (PDB)
|
||||
|
||||
Pods subject to a Pod Disruption Budget(PDB) are not evicted if descheduling violates its PDB. The pods
|
||||
are evicted by using the eviction subresource to handle PDB.
|
||||
|
||||
## High Availability
|
||||
|
||||
In High Availability mode, Descheduler starts [leader election](https://github.com/kubernetes/client-go/tree/master/tools/leaderelection) process in Kubernetes. You can activate HA mode
|
||||
if you choose to deploy your application as Deployment.
|
||||
|
||||
Deployment starts with 1 replica by default. If you want to use more than 1 replica, you must consider
|
||||
enable High Availability mode since we don't want to run descheduler pods simultaneously.
|
||||
|
||||
### Configure HA Mode
|
||||
|
||||
The leader election process can be enabled by setting `--leader-elect` in the CLI. You can also set
|
||||
`--set=leaderElection.enabled=true` flag if you are using Helm.
|
||||
|
||||
To get best results from HA mode some additional configurations might require:
|
||||
* Configure a [podAntiAffinity](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node) rule if you want to schedule onto a node only if that node is in the same zone as at least one already-running descheduler
|
||||
* Set the replica count greater than 1
|
||||
|
||||
## Metrics
|
||||
|
||||
| name | type | description |
|
||||
|-------|-------|----------------|
|
||||
| build_info | gauge | constant 1 |
|
||||
| pods_evicted | CounterVec | total number of pods evicted |
|
||||
|
||||
The metrics are served through https://localhost:10258/metrics by default.
|
||||
The address and port can be changed by setting `--binding-address` and `--secure-port` flags.
|
||||
|
||||
## Compatibility Matrix
|
||||
The below compatibility matrix shows the k8s client package(client-go, apimachinery, etc) versions that descheduler
|
||||
is compiled with. At this time descheduler does not have a hard dependency to a specific k8s release. However a
|
||||
@@ -830,18 +246,13 @@ v0.18 should work with k8s v1.18, v1.17, and v1.16.
|
||||
Starting with descheduler release v0.18 the minor version of descheduler matches the minor version of the k8s client
|
||||
packages that it is compiled with.
|
||||
|
||||
| Descheduler | Supported Kubernetes Version |
|
||||
|-------------|------------------------------|
|
||||
| v0.24 | v1.24 |
|
||||
| v0.23 | v1.23 |
|
||||
| v0.22 | v1.22 |
|
||||
| v0.21 | v1.21 |
|
||||
| v0.20 | v1.20 |
|
||||
| v0.19 | v1.19 |
|
||||
| v0.18 | v1.18 |
|
||||
| v0.10 | v1.17 |
|
||||
| v0.4-v0.9 | v1.9+ |
|
||||
| v0.1-v0.3 | v1.7-v1.8 |
|
||||
Descheduler | Supported Kubernetes Version
|
||||
-------------|-----------------------------
|
||||
v0.18 | v1.18
|
||||
v0.10 | v1.17
|
||||
v0.4-v0.9 | v1.9+
|
||||
v0.1-v0.3 | v1.7-v1.8
|
||||
|
||||
|
||||
## Getting Involved and Contributing
|
||||
|
||||
|
||||
@@ -1,16 +1,16 @@
|
||||
apiVersion: v1
|
||||
name: descheduler
|
||||
version: 0.24.1
|
||||
appVersion: 0.24.1
|
||||
version: 0.18.2
|
||||
appVersion: 0.18.0
|
||||
description: Descheduler for Kubernetes is used to rebalance clusters by evicting pods that can potentially be scheduled on better nodes. In the current implementation, descheduler does not schedule replacement of evicted pods but relies on the default scheduler for that.
|
||||
keywords:
|
||||
- kubernetes
|
||||
- descheduler
|
||||
- kube-scheduler
|
||||
- kubernetes
|
||||
- descheduler
|
||||
- kube-scheduler
|
||||
home: https://github.com/kubernetes-sigs/descheduler
|
||||
icon: https://kubernetes.io/images/favicon.png
|
||||
sources:
|
||||
- https://github.com/kubernetes-sigs/descheduler
|
||||
- https://github.com/kubernetes-sigs/descheduler
|
||||
maintainers:
|
||||
- name: Kubernetes SIG Scheduling
|
||||
email: kubernetes-sig-scheduling@googlegroups.com
|
||||
- name: Kubernetes SIG Scheduling
|
||||
email: kubernetes-sig-scheduling@googlegroups.com
|
||||
|
||||
@@ -6,12 +6,12 @@
|
||||
|
||||
```shell
|
||||
helm repo add descheduler https://kubernetes-sigs.github.io/descheduler/
|
||||
helm install my-release --namespace kube-system descheduler/descheduler
|
||||
$ helm install descheduler/descheduler --name my-release
|
||||
```
|
||||
|
||||
## Introduction
|
||||
|
||||
This chart bootstraps a [descheduler](https://github.com/kubernetes-sigs/descheduler/) cron job on a [Kubernetes](http://kubernetes.io) cluster using the [Helm](https://helm.sh) package manager.
|
||||
This chart bootstraps a [desheduler](https://github.com/kubernetes-sigs/descheduler/) cron job on a [Kubernetes](http://kubernetes.io) cluster using the [Helm](https://helm.sh) package manager.
|
||||
|
||||
## Prerequisites
|
||||
|
||||
@@ -22,7 +22,7 @@ This chart bootstraps a [descheduler](https://github.com/kubernetes-sigs/desched
|
||||
To install the chart with the release name `my-release`:
|
||||
|
||||
```shell
|
||||
helm install --namespace kube-system my-release descheduler/descheduler
|
||||
helm install --name my-release descheduler/descheduler
|
||||
```
|
||||
|
||||
The command deploys _descheduler_ on the Kubernetes cluster in the default configuration. The [configuration](#configuration) section lists the parameters that can be configured during installation.
|
||||
@@ -43,46 +43,17 @@ The command removes all the Kubernetes components associated with the chart and
|
||||
|
||||
The following table lists the configurable parameters of the _descheduler_ chart and their default values.
|
||||
|
||||
| Parameter | Description | Default |
|
||||
|-------------------------------------|-----------------------------------------------------------------------------------------------------------------------|--------------------------------------|
|
||||
| `kind` | Use as CronJob or Deployment | `CronJob` |
|
||||
| `image.repository` | Docker repository to use | `k8s.gcr.io/descheduler/descheduler` |
|
||||
| `image.tag` | Docker tag to use | `v[chart appVersion]` |
|
||||
| `image.pullPolicy` | Docker image pull policy | `IfNotPresent` |
|
||||
| `imagePullSecrets` | Docker repository secrets | `[]` |
|
||||
| `nameOverride` | String to partially override `descheduler.fullname` template (will prepend the release name) | `""` |
|
||||
| `fullnameOverride` | String to fully override `descheduler.fullname` template | `""` |
|
||||
| `cronJobApiVersion` | CronJob API Group Version | `"batch/v1"` |
|
||||
| `schedule` | The cron schedule to run the _descheduler_ job on | `"*/2 * * * *"` |
|
||||
| `startingDeadlineSeconds` | If set, configure `startingDeadlineSeconds` for the _descheduler_ job | `nil` |
|
||||
| `successfulJobsHistoryLimit` | If set, configure `successfulJobsHistoryLimit` for the _descheduler_ job | `nil` |
|
||||
| `failedJobsHistoryLimit` | If set, configure `failedJobsHistoryLimit` for the _descheduler_ job | `nil` |
|
||||
| `deschedulingInterval` | If using kind:Deployment, sets time between consecutive descheduler executions. | `5m` |
|
||||
| `replicas` | The replica count for Deployment | `1` |
|
||||
| `leaderElection` | The options for high availability when running replicated components | _see values.yaml_ |
|
||||
| `cmdOptions` | The options to pass to the _descheduler_ command | _see values.yaml_ |
|
||||
| `deschedulerPolicy.strategies` | The _descheduler_ strategies to apply | _see values.yaml_ |
|
||||
| `priorityClassName` | The name of the priority class to add to pods | `system-cluster-critical` |
|
||||
| `rbac.create` | If `true`, create & use RBAC resources | `true` |
|
||||
| `podSecurityPolicy.create` | If `true`, create PodSecurityPolicy | `true` |
|
||||
| `resources` | Descheduler container CPU and memory requests/limits | _see values.yaml_ |
|
||||
| `serviceAccount.create` | If `true`, create a service account for the cron job | `true` |
|
||||
| `serviceAccount.name` | The name of the service account to use, if not set and create is true a name is generated using the fullname template | `nil` |
|
||||
| `serviceAccount.annotations` | Specifies custom annotations for the serviceAccount | `{}` |
|
||||
| `podAnnotations` | Annotations to add to the descheduler Pods | `{}` |
|
||||
| `podLabels` | Labels to add to the descheduler Pods | `{}` |
|
||||
| `nodeSelector` | Node selectors to run the descheduler cronjob/deployment on specific nodes | `nil` |
|
||||
| `service.enabled` | If `true`, create a service for deployment | `false` |
|
||||
| `serviceMonitor.enabled` | If `true`, create a ServiceMonitor for deployment | `false` |
|
||||
| `serviceMonitor.namespace` | The namespace where Prometheus expects to find service monitors | `nil` |
|
||||
| `serviceMonitor.interval` | The scrape interval. If not set, the Prometheus default scrape interval is used | `nil` |
|
||||
| `serviceMonitor.honorLabels` | Keeps the scraped data's labels when labels are on collisions with target labels. | `true` |
|
||||
| `serviceMonitor.insecureSkipVerify` | Skip TLS certificate validation when scraping | `true` |
|
||||
| `serviceMonitor.serverName` | Name of the server to use when validating TLS certificate | `nil` |
|
||||
| `serviceMonitor.metricRelabelings` | MetricRelabelConfigs to apply to samples after scraping, but before ingestion | `[]` |
|
||||
| `serviceMonitor.relabelings` | RelabelConfigs to apply to samples before scraping | `[]` |
|
||||
| `affinity` | Node affinity to run the descheduler cronjob/deployment on specific nodes | `nil` |
|
||||
| `tolerations` | tolerations to run the descheduler cronjob/deployment on specific nodes | `nil` |
|
||||
| `suspend` | Set spec.suspend in descheduler cronjob | `false` |
|
||||
| `commonLabels` | Labels to apply to all resources | `{}` |
|
||||
| `livenessProbe` | Liveness probe configuration for the descheduler container | _see values.yaml_ |
|
||||
| Parameter | Description | Default |
|
||||
| ------------------------------ | --------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------ |
|
||||
| `image.repository` | Docker repository to use | `us.gcr.io/k8s-artifacts-prod/descheduler/descheduler` |
|
||||
| `image.tag` | Docker tag to use | `v[chart appVersion]` |
|
||||
| `image.pullPolicy` | Docker image pull policy | `IfNotPresent` |
|
||||
| `nameOverride` | String to partially override `descheduler.fullname` template (will prepend the release name) | `""` |
|
||||
| `fullnameOverride` | String to fully override `descheduler.fullname` template | `""` |
|
||||
| `schedule` | The cron schedule to run the _descheduler_ job on | `"*/2 * * * *"` |
|
||||
| `cmdOptions` | The options to pass to the _descheduler_ command | _see values.yaml_ |
|
||||
| `deschedulerPolicy.strategies` | The _descheduler_ strategies to apply | _see values.yaml_ |
|
||||
| `priorityClassName` | The name of the priority class to add to pods | `system-cluster-critical` |
|
||||
| `rbac.create` | If `true`, create & use RBAC resources | `true` |
|
||||
| `serviceAccount.create` | If `true`, create a service account for the cron job | `true` |
|
||||
| `serviceAccount.name` | The name of the service account to use, if not set and create is true a name is generated using the fullname template | `nil` |
|
||||
|
||||
@@ -1,7 +1 @@
|
||||
Descheduler installed as a {{ .Values.kind }}.
|
||||
|
||||
{{- if eq .Values.kind "Deployment" }}
|
||||
{{- if eq .Values.replicas 1.0}}
|
||||
WARNING: You set replica count as 1 and workload kind as Deployment however leaderElection is not enabled. Consider enabling Leader Election for HA mode.
|
||||
{{- end}}
|
||||
{{- end}}
|
||||
Descheduler installed as a cron job.
|
||||
|
||||
@@ -42,17 +42,6 @@ app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
|
||||
{{- end }}
|
||||
app.kubernetes.io/managed-by: {{ .Release.Service }}
|
||||
{{- if .Values.commonLabels}}
|
||||
{{ toYaml .Values.commonLabels }}
|
||||
{{- end }}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Selector labels
|
||||
*/}}
|
||||
{{- define "descheduler.selectorLabels" -}}
|
||||
app.kubernetes.io/name: {{ include "descheduler.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
@@ -65,30 +54,3 @@ Create the name of the service account to use
|
||||
{{ default "default" .Values.serviceAccount.name }}
|
||||
{{- end -}}
|
||||
{{- end -}}
|
||||
|
||||
{{/*
|
||||
Leader Election
|
||||
*/}}
|
||||
{{- define "descheduler.leaderElection"}}
|
||||
{{- if .Values.leaderElection -}}
|
||||
- --leader-elect={{ .Values.leaderElection.enabled }}
|
||||
{{- if .Values.leaderElection.leaseDuration }}
|
||||
- --leader-elect-lease-duration={{ .Values.leaderElection.leaseDuration }}
|
||||
{{- end }}
|
||||
{{- if .Values.leaderElection.renewDeadline }}
|
||||
- --leader-elect-renew-deadline={{ .Values.leaderElection.renewDeadline }}
|
||||
{{- end }}
|
||||
{{- if .Values.leaderElection.retryPeriod }}
|
||||
- --leader-elect-retry-period={{ .Values.leaderElection.retryPeriod }}
|
||||
{{- end }}
|
||||
{{- if .Values.leaderElection.resourceLock }}
|
||||
- --leader-elect-resource-lock={{ .Values.leaderElection.resourceLock }}
|
||||
{{- end }}
|
||||
{{- if .Values.leaderElection.resourceName }}
|
||||
- --leader-elect-resource-name={{ .Values.leaderElection.resourceName }}
|
||||
{{- end }}
|
||||
{{- if .Values.leaderElection.resourceNamescape }}
|
||||
- --leader-elect-resource-namespace={{ .Values.leaderElection.resourceNamescape }}
|
||||
{{- end -}}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
|
||||
@@ -12,32 +12,10 @@ rules:
|
||||
- apiGroups: [""]
|
||||
resources: ["nodes"]
|
||||
verbs: ["get", "watch", "list"]
|
||||
- apiGroups: [""]
|
||||
resources: ["namespaces"]
|
||||
verbs: ["get", "watch", "list"]
|
||||
- apiGroups: [""]
|
||||
resources: ["pods"]
|
||||
verbs: ["get", "watch", "list", "delete"]
|
||||
- apiGroups: [""]
|
||||
resources: ["pods/eviction"]
|
||||
verbs: ["create"]
|
||||
- apiGroups: ["scheduling.k8s.io"]
|
||||
resources: ["priorityclasses"]
|
||||
verbs: ["get", "watch", "list"]
|
||||
{{- if .Values.leaderElection.enabled }}
|
||||
- apiGroups: ["coordination.k8s.io"]
|
||||
resources: ["leases"]
|
||||
verbs: ["create", "update"]
|
||||
- apiGroups: ["coordination.k8s.io"]
|
||||
resources: ["leases"]
|
||||
resourceNames: ["descheduler"]
|
||||
verbs: ["get", "patch", "delete"]
|
||||
{{- end }}
|
||||
{{- if .Values.podSecurityPolicy.create }}
|
||||
- apiGroups: ['policy']
|
||||
resources: ['podsecuritypolicies']
|
||||
verbs: ['use']
|
||||
resourceNames:
|
||||
- {{ template "descheduler.fullname" . }}
|
||||
{{- end }}
|
||||
{{- end -}}
|
||||
|
||||
@@ -2,7 +2,6 @@ apiVersion: v1
|
||||
kind: ConfigMap
|
||||
metadata:
|
||||
name: {{ template "descheduler.fullname" . }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "descheduler.labels" . | nindent 4 }}
|
||||
data:
|
||||
|
||||
@@ -1,26 +1,12 @@
|
||||
{{- if eq .Values.kind "CronJob" }}
|
||||
apiVersion: {{ .Values.cronJobApiVersion | default "batch/v1" }}
|
||||
apiVersion: batch/v1beta1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: {{ template "descheduler.fullname" . }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "descheduler.labels" . | nindent 4 }}
|
||||
spec:
|
||||
schedule: {{ .Values.schedule | quote }}
|
||||
{{- if .Values.suspend }}
|
||||
suspend: {{ .Values.suspend }}
|
||||
{{- end }}
|
||||
concurrencyPolicy: "Forbid"
|
||||
{{- if .Values.startingDeadlineSeconds }}
|
||||
startingDeadlineSeconds: {{ .Values.startingDeadlineSeconds }}
|
||||
{{- end }}
|
||||
{{- if .Values.successfulJobsHistoryLimit }}
|
||||
successfulJobsHistoryLimit: {{ .Values.successfulJobsHistoryLimit }}
|
||||
{{- end }}
|
||||
{{- if .Values.failedJobsHistoryLimit }}
|
||||
failedJobsHistoryLimit: {{ .Values.failedJobsHistoryLimit }}
|
||||
{{- end }}
|
||||
jobTemplate:
|
||||
spec:
|
||||
template:
|
||||
@@ -32,32 +18,17 @@ spec:
|
||||
{{- .Values.podAnnotations | toYaml | nindent 12 }}
|
||||
{{- end }}
|
||||
labels:
|
||||
{{- include "descheduler.selectorLabels" . | nindent 12 }}
|
||||
app.kubernetes.io/name: {{ include "descheduler.name" . }}
|
||||
app.kubernetes.io/instance: {{ .Release.Name }}
|
||||
{{- if .Values.podLabels }}
|
||||
{{- .Values.podLabels | toYaml | nindent 12 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- with .Values.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- with .Values.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 12 }}
|
||||
{{- end }}
|
||||
{{- if .Values.priorityClassName }}
|
||||
priorityClassName: {{ .Values.priorityClassName }}
|
||||
{{- end }}
|
||||
serviceAccountName: {{ template "descheduler.serviceAccountName" . }}
|
||||
restartPolicy: "Never"
|
||||
{{- with .Values.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- toYaml . | nindent 10 }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: {{ .Chart.Name }}
|
||||
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default (printf "v%s" .Chart.AppVersion) }}"
|
||||
@@ -73,18 +44,6 @@ spec:
|
||||
- {{ $value | quote }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
livenessProbe:
|
||||
{{- toYaml .Values.livenessProbe | nindent 16 }}
|
||||
resources:
|
||||
{{- toYaml .Values.resources | nindent 16 }}
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
privileged: false
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
volumeMounts:
|
||||
- mountPath: /policy-dir
|
||||
name: policy-volume
|
||||
@@ -92,4 +51,3 @@ spec:
|
||||
- name: policy-volume
|
||||
configMap:
|
||||
name: {{ template "descheduler.fullname" . }}
|
||||
{{- end }}
|
||||
|
||||
@@ -1,94 +0,0 @@
|
||||
{{- if eq .Values.kind "Deployment" }}
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: {{ template "descheduler.fullname" . }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "descheduler.labels" . | nindent 4 }}
|
||||
spec:
|
||||
{{- if gt .Values.replicas 1.0}}
|
||||
{{- if not .Values.leaderElection.enabled }}
|
||||
{{- fail "You must set leaderElection to use more than 1 replica"}}
|
||||
{{- end}}
|
||||
replicas: {{ required "leaderElection required for running more than one replica" .Values.replicas }}
|
||||
{{- else }}
|
||||
replicas: 1
|
||||
{{- end }}
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "descheduler.selectorLabels" . | nindent 6 }}
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
{{- include "descheduler.selectorLabels" . | nindent 8 }}
|
||||
{{- if .Values.podLabels }}
|
||||
{{- .Values.podLabels | toYaml | nindent 8 }}
|
||||
{{- end }}
|
||||
annotations:
|
||||
checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
|
||||
{{- if .Values.podAnnotations }}
|
||||
{{- .Values.podAnnotations | toYaml | nindent 8 }}
|
||||
{{- end }}
|
||||
spec:
|
||||
{{- if .Values.priorityClassName }}
|
||||
priorityClassName: {{ .Values.priorityClassName }}
|
||||
{{- end }}
|
||||
serviceAccountName: {{ template "descheduler.serviceAccountName" . }}
|
||||
{{- with .Values.imagePullSecrets }}
|
||||
imagePullSecrets:
|
||||
{{- toYaml . | nindent 10 }}
|
||||
{{- end }}
|
||||
containers:
|
||||
- name: {{ .Chart.Name }}
|
||||
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default (printf "v%s" .Chart.AppVersion) }}"
|
||||
imagePullPolicy: {{ .Values.image.pullPolicy }}
|
||||
command:
|
||||
- "/bin/descheduler"
|
||||
args:
|
||||
- "--policy-config-file"
|
||||
- "/policy-dir/policy.yaml"
|
||||
- "--descheduling-interval"
|
||||
- {{ required "deschedulingInterval required for running as Deployment" .Values.deschedulingInterval }}
|
||||
{{- range $key, $value := .Values.cmdOptions }}
|
||||
- {{ printf "--%s" $key | quote }}
|
||||
{{- if $value }}
|
||||
- {{ $value | quote }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- include "descheduler.leaderElection" . | nindent 12 }}
|
||||
ports:
|
||||
- containerPort: 10258
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
{{- toYaml .Values.livenessProbe | nindent 12 }}
|
||||
resources:
|
||||
{{- toYaml .Values.resources | nindent 12 }}
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
privileged: false
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
volumeMounts:
|
||||
- mountPath: /policy-dir
|
||||
name: policy-volume
|
||||
volumes:
|
||||
- name: policy-volume
|
||||
configMap:
|
||||
name: {{ template "descheduler.fullname" . }}
|
||||
{{- with .Values.nodeSelector }}
|
||||
nodeSelector:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.affinity }}
|
||||
affinity:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- with .Values.tolerations }}
|
||||
tolerations:
|
||||
{{- toYaml . | nindent 8 }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
@@ -1,39 +0,0 @@
|
||||
{{- if .Values.podSecurityPolicy.create -}}
|
||||
apiVersion: policy/v1beta1
|
||||
kind: PodSecurityPolicy
|
||||
metadata:
|
||||
name: {{ template "descheduler.fullname" . }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
annotations:
|
||||
seccomp.security.alpha.kubernetes.io/allowedProfileNames: 'docker/default,runtime/default'
|
||||
seccomp.security.alpha.kubernetes.io/defaultProfileName: 'runtime/default'
|
||||
spec:
|
||||
privileged: false
|
||||
allowPrivilegeEscalation: false
|
||||
requiredDropCapabilities:
|
||||
- ALL
|
||||
volumes:
|
||||
- 'configMap'
|
||||
- 'secret'
|
||||
hostNetwork: false
|
||||
hostIPC: false
|
||||
hostPID: false
|
||||
runAsUser:
|
||||
rule: 'MustRunAs'
|
||||
ranges:
|
||||
- min: 1
|
||||
max: 65535
|
||||
seLinux:
|
||||
rule: 'RunAsAny'
|
||||
supplementalGroups:
|
||||
rule: 'MustRunAs'
|
||||
ranges:
|
||||
- min: 1
|
||||
max: 65535
|
||||
fsGroup:
|
||||
rule: 'MustRunAs'
|
||||
ranges:
|
||||
- min: 1
|
||||
max: 65535
|
||||
readOnlyRootFilesystem: true
|
||||
{{- end -}}
|
||||
@@ -1,21 +0,0 @@
|
||||
{{- if eq .Values.kind "Deployment" }}
|
||||
{{- if eq .Values.service.enabled true }}
|
||||
apiVersion: v1
|
||||
kind: Service
|
||||
metadata:
|
||||
labels:
|
||||
{{- include "descheduler.labels" . | nindent 4 }}
|
||||
name: {{ template "descheduler.fullname" . }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
spec:
|
||||
clusterIP: None
|
||||
ports:
|
||||
- name: http-metrics
|
||||
port: 10258
|
||||
protocol: TCP
|
||||
targetPort: 10258
|
||||
selector:
|
||||
{{- include "descheduler.selectorLabels" . | nindent 4 }}
|
||||
type: ClusterIP
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
@@ -3,10 +3,6 @@ apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
metadata:
|
||||
name: {{ template "descheduler.serviceAccountName" . }}
|
||||
namespace: {{ .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "descheduler.labels" . | nindent 4 }}
|
||||
{{- if .Values.serviceAccount.annotations }}
|
||||
annotations: {{ toYaml .Values.serviceAccount.annotations | nindent 4 }}
|
||||
{{- end }}
|
||||
{{- end -}}
|
||||
|
||||
@@ -1,41 +0,0 @@
|
||||
{{- if eq .Values.kind "Deployment" }}
|
||||
{{- if eq .Values.serviceMonitor.enabled true }}
|
||||
apiVersion: monitoring.coreos.com/v1
|
||||
kind: ServiceMonitor
|
||||
metadata:
|
||||
name: {{ template "descheduler.fullname" . }}-servicemonitor
|
||||
namespace: {{ .Values.serviceMonitor.namespace | default .Release.Namespace }}
|
||||
labels:
|
||||
{{- include "descheduler.labels" . | nindent 4 }}
|
||||
spec:
|
||||
jobLabel: jobLabel
|
||||
namespaceSelector:
|
||||
matchNames:
|
||||
- {{ .Release.Namespace }}
|
||||
selector:
|
||||
matchLabels:
|
||||
{{- include "descheduler.selectorLabels" . | nindent 6 }}
|
||||
endpoints:
|
||||
- honorLabels: {{ .Values.serviceMonitor.honorLabels | default true }}
|
||||
port: http-metrics
|
||||
{{- if .Values.serviceMonitor.interval }}
|
||||
interval: {{ .Values.serviceMonitor.interval }}
|
||||
{{- end }}
|
||||
scheme: https
|
||||
tlsConfig:
|
||||
{{- if eq .Values.serviceMonitor.insecureSkipVerify true }}
|
||||
insecureSkipVerify: true
|
||||
{{- end }}
|
||||
{{- if .Values.serviceMonitor.serverName }}
|
||||
serverName: {{ .Values.serviceMonitor.serverName }}
|
||||
{{- end}}
|
||||
{{- if .Values.serviceMonitor.metricRelabelings }}
|
||||
metricRelabelings:
|
||||
{{ tpl (toYaml .Values.serviceMonitor.metricRelabelings | indent 4) . }}
|
||||
{{- end }}
|
||||
{{- if .Values.serviceMonitor.relabelings }}
|
||||
relabelings:
|
||||
{{ tpl (toYaml .Values.serviceMonitor.relabelings | indent 4) . }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
{{- end }}
|
||||
@@ -1,30 +0,0 @@
|
||||
apiVersion: v1
|
||||
kind: Pod
|
||||
metadata:
|
||||
name: descheduler-test-pod
|
||||
namespace: {{ .Release.Namespace }}
|
||||
annotations:
|
||||
"helm.sh/hook": test
|
||||
spec:
|
||||
restartPolicy: Never
|
||||
serviceAccountName: descheduler-ci
|
||||
containers:
|
||||
- name: descheduler-test-container
|
||||
image: alpine:latest
|
||||
imagePullPolicy: IfNotPresent
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- All
|
||||
privileged: false
|
||||
runAsNonRoot: false
|
||||
command: ["/bin/ash"]
|
||||
args:
|
||||
- -c
|
||||
- >-
|
||||
apk --no-cache add curl &&
|
||||
curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl &&
|
||||
chmod +x ./kubectl &&
|
||||
mv ./kubectl /usr/local/bin/kubectl &&
|
||||
/usr/local/bin/kubectl get pods --namespace {{ .Release.Namespace }} --token "$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" | grep "descheduler" | grep "Completed"
|
||||
@@ -2,68 +2,24 @@
|
||||
# This is a YAML-formatted file.
|
||||
# Declare variables to be passed into your templates.
|
||||
|
||||
# CronJob or Deployment
|
||||
kind: CronJob
|
||||
|
||||
image:
|
||||
repository: k8s.gcr.io/descheduler/descheduler
|
||||
repository: us.gcr.io/k8s-artifacts-prod/descheduler/descheduler
|
||||
# Overrides the image tag whose default is the chart version
|
||||
tag: ""
|
||||
pullPolicy: IfNotPresent
|
||||
|
||||
imagePullSecrets:
|
||||
# - name: container-registry-secret
|
||||
|
||||
resources:
|
||||
requests:
|
||||
cpu: 500m
|
||||
memory: 256Mi
|
||||
# limits:
|
||||
# cpu: 100m
|
||||
# memory: 128Mi
|
||||
|
||||
nameOverride: ""
|
||||
fullnameOverride: ""
|
||||
|
||||
# labels that'll be applied to all resources
|
||||
commonLabels: {}
|
||||
|
||||
cronJobApiVersion: "batch/v1" # Use "batch/v1beta1" for k8s version < 1.21.0. TODO(@7i) remove with 1.23 release
|
||||
schedule: "*/2 * * * *"
|
||||
suspend: false
|
||||
#startingDeadlineSeconds: 200
|
||||
#successfulJobsHistoryLimit: 1
|
||||
#failedJobsHistoryLimit: 1
|
||||
|
||||
# Required when running as a Deployment
|
||||
deschedulingInterval: 5m
|
||||
|
||||
# Specifies the replica count for Deployment
|
||||
# Set leaderElection if you want to use more than 1 replica
|
||||
# Set affinity.podAntiAffinity rule if you want to schedule onto a node
|
||||
# only if that node is in the same zone as at least one already-running descheduler
|
||||
replicas: 1
|
||||
|
||||
# Specifies whether Leader Election resources should be created
|
||||
# Required when running as a Deployment
|
||||
leaderElection: {}
|
||||
# enabled: true
|
||||
# leaseDuration: 15s
|
||||
# renewDeadline: 10s
|
||||
# retryPeriod: 2s
|
||||
# resourceLock: "leases"
|
||||
# resourceName: "descheduler"
|
||||
# resourceNamescape: "kube-system"
|
||||
|
||||
cmdOptions:
|
||||
v: 3
|
||||
# evict-local-storage-pods:
|
||||
# max-pods-to-evict-per-node: 10
|
||||
# node-selector: "key1=value1,key2=value2"
|
||||
|
||||
deschedulerPolicy:
|
||||
# nodeSelector: "key1=value1,key2=value2"
|
||||
# maxNoOfPodsToEvictPerNode: 10
|
||||
# maxNoOfPodsToEvictPerNamespace: 10
|
||||
# ignorePvcPods: true
|
||||
# evictLocalStoragePods: true
|
||||
strategies:
|
||||
RemoveDuplicates:
|
||||
enabled: true
|
||||
@@ -72,8 +28,8 @@ deschedulerPolicy:
|
||||
RemovePodsViolatingNodeAffinity:
|
||||
enabled: true
|
||||
params:
|
||||
nodeAffinityType:
|
||||
- requiredDuringSchedulingIgnoredDuringExecution
|
||||
nodeAffinityType:
|
||||
- requiredDuringSchedulingIgnoredDuringExecution
|
||||
RemovePodsViolatingInterPodAntiAffinity:
|
||||
enabled: true
|
||||
LowNodeUtilization:
|
||||
@@ -91,83 +47,13 @@ deschedulerPolicy:
|
||||
|
||||
priorityClassName: system-cluster-critical
|
||||
|
||||
nodeSelector: {}
|
||||
# foo: bar
|
||||
|
||||
affinity: {}
|
||||
# nodeAffinity:
|
||||
# requiredDuringSchedulingIgnoredDuringExecution:
|
||||
# nodeSelectorTerms:
|
||||
# - matchExpressions:
|
||||
# - key: kubernetes.io/e2e-az-name
|
||||
# operator: In
|
||||
# values:
|
||||
# - e2e-az1
|
||||
# - e2e-az2
|
||||
# podAntiAffinity:
|
||||
# requiredDuringSchedulingIgnoredDuringExecution:
|
||||
# - labelSelector:
|
||||
# matchExpressions:
|
||||
# - key: app.kubernetes.io/name
|
||||
# operator: In
|
||||
# values:
|
||||
# - descheduler
|
||||
# topologyKey: "kubernetes.io/hostname"
|
||||
tolerations: []
|
||||
# - key: 'management'
|
||||
# operator: 'Equal'
|
||||
# value: 'tool'
|
||||
# effect: 'NoSchedule'
|
||||
|
||||
rbac:
|
||||
# Specifies whether RBAC resources should be created
|
||||
create: true
|
||||
|
||||
podSecurityPolicy:
|
||||
# Specifies whether PodSecurityPolicy should be created.
|
||||
create: true
|
||||
|
||||
serviceAccount:
|
||||
# Specifies whether a ServiceAccount should be created
|
||||
create: true
|
||||
# The name of the ServiceAccount to use.
|
||||
# If not set and create is true, a name is generated using the fullname template
|
||||
name:
|
||||
# Specifies custom annotations for the serviceAccount
|
||||
annotations: {}
|
||||
|
||||
podAnnotations: {}
|
||||
|
||||
podLabels: {}
|
||||
|
||||
livenessProbe:
|
||||
failureThreshold: 3
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 10258
|
||||
scheme: HTTPS
|
||||
initialDelaySeconds: 3
|
||||
periodSeconds: 10
|
||||
|
||||
service:
|
||||
enabled: false
|
||||
|
||||
serviceMonitor:
|
||||
enabled: false
|
||||
# The namespace where Prometheus expects to find service monitors.
|
||||
# namespace: ""
|
||||
interval: ""
|
||||
# honorLabels: true
|
||||
insecureSkipVerify: true
|
||||
serverName: null
|
||||
metricRelabelings: []
|
||||
# - action: keep
|
||||
# regex: 'descheduler_(build_info|pods_evicted)'
|
||||
# sourceLabels: [__name__]
|
||||
relabelings: []
|
||||
# - sourceLabels: [__meta_kubernetes_pod_node_name]
|
||||
# separator: ;
|
||||
# regex: ^(.*)$
|
||||
# targetLabel: nodename
|
||||
# replacement: $1
|
||||
# action: replace
|
||||
|
||||
@@ -7,14 +7,14 @@ timeout: 1200s
|
||||
options:
|
||||
substitution_option: ALLOW_LOOSE
|
||||
steps:
|
||||
- name: 'gcr.io/k8s-staging-test-infra/gcb-docker-gcloud:v20211118-2f2d816b90'
|
||||
- name: 'gcr.io/k8s-testimages/gcb-docker-gcloud:v20190906-745fed4'
|
||||
entrypoint: make
|
||||
env:
|
||||
- DOCKER_CLI_EXPERIMENTAL=enabled
|
||||
- VERSION=$_GIT_TAG
|
||||
- BASE_REF=$_PULL_BASE_REF
|
||||
args:
|
||||
- push-all
|
||||
- push
|
||||
substitutions:
|
||||
# _GIT_TAG will be filled with a git-based tag for the image, of the form vYYYYMMDD-hash, and
|
||||
# can be used as a substitution
|
||||
|
||||
@@ -18,77 +18,44 @@ limitations under the License.
|
||||
package options
|
||||
|
||||
import (
|
||||
"github.com/spf13/pflag"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
apiserveroptions "k8s.io/apiserver/pkg/server/options"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
componentbaseconfig "k8s.io/component-base/config"
|
||||
componentbaseoptions "k8s.io/component-base/config/options"
|
||||
|
||||
// install the componentconfig api so we get its defaulting and conversion functions
|
||||
"sigs.k8s.io/descheduler/pkg/apis/componentconfig"
|
||||
"sigs.k8s.io/descheduler/pkg/apis/componentconfig/v1alpha1"
|
||||
deschedulerscheme "sigs.k8s.io/descheduler/pkg/descheduler/scheme"
|
||||
"time"
|
||||
)
|
||||
|
||||
const (
|
||||
DefaultDeschedulerPort = 10258
|
||||
"github.com/spf13/pflag"
|
||||
)
|
||||
|
||||
// DeschedulerServer configuration
|
||||
type DeschedulerServer struct {
|
||||
componentconfig.DeschedulerConfiguration
|
||||
|
||||
Client clientset.Interface
|
||||
SecureServing *apiserveroptions.SecureServingOptionsWithLoopback
|
||||
DisableMetrics bool
|
||||
Client clientset.Interface
|
||||
}
|
||||
|
||||
// NewDeschedulerServer creates a new DeschedulerServer with default parameters
|
||||
func NewDeschedulerServer() (*DeschedulerServer, error) {
|
||||
cfg, err := newDefaultComponentConfig()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
secureServing := apiserveroptions.NewSecureServingOptions().WithLoopback()
|
||||
secureServing.BindPort = DefaultDeschedulerPort
|
||||
|
||||
return &DeschedulerServer{
|
||||
DeschedulerConfiguration: *cfg,
|
||||
SecureServing: secureServing,
|
||||
}, nil
|
||||
}
|
||||
|
||||
func newDefaultComponentConfig() (*componentconfig.DeschedulerConfiguration, error) {
|
||||
versionedCfg := v1alpha1.DeschedulerConfiguration{
|
||||
LeaderElection: componentbaseconfig.LeaderElectionConfiguration{
|
||||
LeaderElect: false,
|
||||
LeaseDuration: metav1.Duration{Duration: 137 * time.Second},
|
||||
RenewDeadline: metav1.Duration{Duration: 107 * time.Second},
|
||||
RetryPeriod: metav1.Duration{Duration: 26 * time.Second},
|
||||
ResourceLock: "leases",
|
||||
ResourceName: "descheduler",
|
||||
ResourceNamespace: "kube-system",
|
||||
},
|
||||
}
|
||||
deschedulerscheme.Scheme.Default(&versionedCfg)
|
||||
func NewDeschedulerServer() *DeschedulerServer {
|
||||
versioned := v1alpha1.DeschedulerConfiguration{}
|
||||
deschedulerscheme.Scheme.Default(&versioned)
|
||||
cfg := componentconfig.DeschedulerConfiguration{}
|
||||
if err := deschedulerscheme.Scheme.Convert(&versionedCfg, &cfg, nil); err != nil {
|
||||
return nil, err
|
||||
deschedulerscheme.Scheme.Convert(versioned, &cfg, nil)
|
||||
s := DeschedulerServer{
|
||||
DeschedulerConfiguration: cfg,
|
||||
}
|
||||
return &cfg, nil
|
||||
return &s
|
||||
}
|
||||
|
||||
// AddFlags adds flags for a specific SchedulerServer to the specified FlagSet
|
||||
func (rs *DeschedulerServer) AddFlags(fs *pflag.FlagSet) {
|
||||
fs.StringVar(&rs.Logging.Format, "logging-format", "text", `Sets the log format. Permitted formats: "text", "json". Non-default formats don't honor these flags: --add-dir-header, --alsologtostderr, --log-backtrace-at, --log-dir, --log-file, --log-file-max-size, --logtostderr, --skip-headers, --skip-log-headers, --stderrthreshold, --log-flush-frequency.\nNon-default choices are currently alpha and subject to change without warning.`)
|
||||
fs.DurationVar(&rs.DeschedulingInterval, "descheduling-interval", rs.DeschedulingInterval, "Time interval between two consecutive descheduler executions. Setting this value instructs the descheduler to run in a continuous loop at the interval specified.")
|
||||
fs.StringVar(&rs.KubeconfigFile, "kubeconfig", rs.KubeconfigFile, "File with kube configuration.")
|
||||
fs.StringVar(&rs.PolicyConfigFile, "policy-config-file", rs.PolicyConfigFile, "File with descheduler policy configuration.")
|
||||
fs.BoolVar(&rs.DryRun, "dry-run", rs.DryRun, "execute descheduler in dry run mode.")
|
||||
fs.BoolVar(&rs.DisableMetrics, "disable-metrics", rs.DisableMetrics, "Disables metrics. The metrics are by default served through https://localhost:10258/metrics. Secure address, resp. port can be changed through --bind-address, resp. --secure-port flags.")
|
||||
|
||||
componentbaseoptions.BindLeaderElectionFlags(&rs.LeaderElection, fs)
|
||||
|
||||
rs.SecureServing.AddFlags(fs)
|
||||
// node-selector query causes descheduler to run only on nodes that matches the node labels in the query
|
||||
fs.StringVar(&rs.NodeSelector, "node-selector", rs.NodeSelector, "Selector (label query) to filter on, supports '=', '==', and '!='.(e.g. -l key1=value1,key2=value2)")
|
||||
// max-no-pods-to-evict limits the maximum number of pods to be evicted per node by descheduler.
|
||||
fs.IntVar(&rs.MaxNoOfPodsToEvictPerNode, "max-pods-to-evict-per-node", rs.MaxNoOfPodsToEvictPerNode, "Limits the maximum number of pods to be evicted per node by descheduler")
|
||||
// evict-local-storage-pods allows eviction of pods that are using local storage. This is false by default.
|
||||
fs.BoolVar(&rs.EvictLocalStoragePods, "evict-local-storage-pods", rs.EvictLocalStoragePods, "Enables evicting pods using local storage by descheduler")
|
||||
}
|
||||
|
||||
@@ -18,91 +18,44 @@ limitations under the License.
|
||||
package app
|
||||
|
||||
import (
|
||||
"context"
|
||||
"flag"
|
||||
"io"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
|
||||
"k8s.io/apiserver/pkg/server/healthz"
|
||||
|
||||
"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
|
||||
apiserver "k8s.io/apiserver/pkg/server"
|
||||
"k8s.io/apiserver/pkg/server/mux"
|
||||
restclient "k8s.io/client-go/rest"
|
||||
"k8s.io/component-base/config"
|
||||
_ "k8s.io/component-base/logs/json/register"
|
||||
"k8s.io/component-base/logs/registry"
|
||||
"k8s.io/component-base/metrics/legacyregistry"
|
||||
"k8s.io/klog/v2"
|
||||
aflag "k8s.io/component-base/cli/flag"
|
||||
"k8s.io/component-base/logs"
|
||||
"k8s.io/klog"
|
||||
)
|
||||
|
||||
// NewDeschedulerCommand creates a *cobra.Command object with default parameters
|
||||
func NewDeschedulerCommand(out io.Writer) *cobra.Command {
|
||||
s, err := options.NewDeschedulerServer()
|
||||
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "unable to initialize server")
|
||||
}
|
||||
|
||||
s := options.NewDeschedulerServer()
|
||||
cmd := &cobra.Command{
|
||||
Use: "descheduler",
|
||||
Short: "descheduler",
|
||||
Long: `The descheduler evicts pods which may be bound to less desired nodes`,
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
// s.Logs.Config.Format = s.Logging.Format
|
||||
|
||||
// LoopbackClientConfig is a config for a privileged loopback connection
|
||||
var LoopbackClientConfig *restclient.Config
|
||||
var SecureServing *apiserver.SecureServingInfo
|
||||
if err := s.SecureServing.ApplyTo(&SecureServing, &LoopbackClientConfig); err != nil {
|
||||
klog.ErrorS(err, "failed to apply secure server configuration")
|
||||
return
|
||||
}
|
||||
|
||||
factory, _ := registry.LogRegistry.Get(s.Logging.Format)
|
||||
if factory == nil {
|
||||
klog.ClearLogger()
|
||||
} else {
|
||||
log, logrFlush := factory.Create(config.LoggingConfiguration{})
|
||||
defer logrFlush()
|
||||
klog.SetLogger(log)
|
||||
}
|
||||
|
||||
ctx, done := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
|
||||
|
||||
pathRecorderMux := mux.NewPathRecorderMux("descheduler")
|
||||
if !s.DisableMetrics {
|
||||
pathRecorderMux.Handle("/metrics", legacyregistry.HandlerWithReset())
|
||||
}
|
||||
|
||||
healthz.InstallHandler(pathRecorderMux, healthz.NamedCheck("Descheduler", healthz.PingHealthz.Check))
|
||||
|
||||
stoppedCh, _, err := SecureServing.Serve(pathRecorderMux, 0, ctx.Done())
|
||||
logs.InitLogs()
|
||||
defer logs.FlushLogs()
|
||||
err := Run(s)
|
||||
if err != nil {
|
||||
klog.Fatalf("failed to start secure server: %v", err)
|
||||
return
|
||||
klog.Errorf("%v", err)
|
||||
}
|
||||
|
||||
err = Run(ctx, s)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "descheduler server")
|
||||
}
|
||||
|
||||
done()
|
||||
// wait for metrics server to close
|
||||
<-stoppedCh
|
||||
},
|
||||
}
|
||||
cmd.SetOut(out)
|
||||
cmd.SetOutput(out)
|
||||
|
||||
flags := cmd.Flags()
|
||||
flags.SetNormalizeFunc(aflag.WordSepNormalizeFunc)
|
||||
flags.AddGoFlagSet(flag.CommandLine)
|
||||
s.AddFlags(flags)
|
||||
return cmd
|
||||
}
|
||||
|
||||
func Run(ctx context.Context, rs *options.DeschedulerServer) error {
|
||||
return descheduler.Run(ctx, rs)
|
||||
func Run(rs *options.DeschedulerServer) error {
|
||||
return descheduler.Run(rs)
|
||||
}
|
||||
|
||||
@@ -18,19 +18,70 @@ package app
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"runtime"
|
||||
"strings"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"sigs.k8s.io/descheduler/pkg/version"
|
||||
)
|
||||
|
||||
var (
|
||||
// gitCommit is a constant representing the source version that
|
||||
// generated this build. It should be set during build via -ldflags.
|
||||
gitCommit string
|
||||
// version is a constant representing the version tag that
|
||||
// generated this build. It should be set during build via -ldflags.
|
||||
version string
|
||||
// buildDate in ISO8601 format, output of $(date -u +'%Y-%m-%dT%H:%M:%SZ')
|
||||
//It should be set during build via -ldflags.
|
||||
buildDate string
|
||||
)
|
||||
|
||||
// Info holds the information related to descheduler app version.
|
||||
type Info struct {
|
||||
Major string `json:"major"`
|
||||
Minor string `json:"minor"`
|
||||
GitCommit string `json:"gitCommit"`
|
||||
GitVersion string `json:"gitVersion"`
|
||||
BuildDate string `json:"buildDate"`
|
||||
GoVersion string `json:"goVersion"`
|
||||
Compiler string `json:"compiler"`
|
||||
Platform string `json:"platform"`
|
||||
}
|
||||
|
||||
// Get returns the overall codebase version. It's for detecting
|
||||
// what code a binary was built from.
|
||||
func Get() Info {
|
||||
majorVersion, minorVersion := splitVersion(version)
|
||||
return Info{
|
||||
Major: majorVersion,
|
||||
Minor: minorVersion,
|
||||
GitCommit: gitCommit,
|
||||
GitVersion: version,
|
||||
BuildDate: buildDate,
|
||||
GoVersion: runtime.Version(),
|
||||
Compiler: runtime.Compiler,
|
||||
Platform: fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH),
|
||||
}
|
||||
}
|
||||
|
||||
func NewVersionCommand() *cobra.Command {
|
||||
var versionCmd = &cobra.Command{
|
||||
Use: "version",
|
||||
Short: "Version of descheduler",
|
||||
Long: `Prints the version of descheduler.`,
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
fmt.Printf("Descheduler version %+v\n", version.Get())
|
||||
fmt.Printf("Descheduler version %+v\n", Get())
|
||||
},
|
||||
}
|
||||
return versionCmd
|
||||
}
|
||||
|
||||
// splitVersion splits the git version to generate major and minor versions needed.
|
||||
func splitVersion(version string) (string, string) {
|
||||
if version == "" {
|
||||
return "", ""
|
||||
}
|
||||
// A sample version would be of form v0.1.0-7-ge884046, so split at first '.' and
|
||||
// then return 0 and 1+(+ appended to follow semver convention) for major and minor versions.
|
||||
return strings.Trim(strings.Split(version, ".")[0], "v"), strings.Split(version, ".")[1] + "+"
|
||||
}
|
||||
|
||||
@@ -17,23 +17,20 @@ limitations under the License.
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"k8s.io/component-base/cli"
|
||||
"k8s.io/klog/v2"
|
||||
"sigs.k8s.io/descheduler/cmd/descheduler/app"
|
||||
)
|
||||
|
||||
func init() {
|
||||
klog.SetOutput(os.Stdout)
|
||||
klog.InitFlags(nil)
|
||||
}
|
||||
|
||||
func main() {
|
||||
out := os.Stdout
|
||||
cmd := app.NewDeschedulerCommand(out)
|
||||
cmd.AddCommand(app.NewVersionCommand())
|
||||
|
||||
code := cli.Run(cmd)
|
||||
os.Exit(code)
|
||||
flag.CommandLine.Parse([]string{})
|
||||
if err := cmd.Execute(); err != nil {
|
||||
fmt.Println(err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,10 +3,10 @@
|
||||
## Required Tools
|
||||
|
||||
- [Git](https://git-scm.com/downloads)
|
||||
- [Go 1.16+](https://golang.org/dl/)
|
||||
- [Go 1.13+](https://golang.org/dl/)
|
||||
- [Docker](https://docs.docker.com/install/)
|
||||
- [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl)
|
||||
- [kind v0.10.0+](https://kind.sigs.k8s.io/)
|
||||
- [kind](https://kind.sigs.k8s.io/)
|
||||
|
||||
## Build and Run
|
||||
|
||||
@@ -34,23 +34,9 @@ GOOS=linux make dev-image
|
||||
kind create cluster --config hack/kind_config.yaml
|
||||
kind load docker-image <image name>
|
||||
kind get kubeconfig > /tmp/admin.conf
|
||||
export KUBECONFIG=/tmp/admin.conf
|
||||
make test-unit
|
||||
make test-e2e
|
||||
```
|
||||
|
||||
## Run Helm Tests
|
||||
Run the helm test for a particular descheduler release by setting below variables,
|
||||
```
|
||||
HELM_IMAGE_REPO="descheduler"
|
||||
HELM_IMAGE_TAG="helm-test"
|
||||
HELM_CHART_LOCATION="./charts/descheduler"
|
||||
```
|
||||
The helm tests runs as part of descheduler CI. But, to run it manually from the descheduler root,
|
||||
|
||||
```
|
||||
make test-helm
|
||||
```
|
||||
|
||||
### Miscellaneous
|
||||
See the [hack directory](https://github.com/kubernetes-sigs/descheduler/tree/master/hack) for additional tools and scripts used for developing the descheduler.
|
||||
See the [hack directory](https://github.com/kubernetes-sigs/descheduler/tree/master/hack) for additional tools and scripts used for developing the descheduler.
|
||||
@@ -1,16 +0,0 @@
|
||||
# Proposals
|
||||
This document walk you through about all the enhancements proposals for descheduler.
|
||||
|
||||
## Descheduler v1alpha2 Design Proposal
|
||||
```yaml
|
||||
title: Descheduler v1alpha2 Design Proposal
|
||||
authors:
|
||||
- "@damemi"
|
||||
link:
|
||||
- https://docs.google.com/document/d/1S1JCh-0F-QCJvBBG-kbmXiHAJFF8doArhDIAKbOj93I/edit#heading=h.imbp1ctnc8lx
|
||||
- https://github.com/kubernetes-sigs/descheduler/issues/679
|
||||
owning-sig: sig-scheduling
|
||||
creation-date: 2021-05-01
|
||||
status: implementable
|
||||
```
|
||||
|
||||
@@ -9,7 +9,7 @@
|
||||
3. Push the release branch to the descheuler repo and ensure branch protection is enabled (not required for patch releases)
|
||||
4. Tag the repository from the `master` branch (from the `release-1.18` branch for a patch release) and push the tag `VERSION=v0.18.0 git tag -m $VERSION $VERSION; git push origin $VERSION`
|
||||
5. Publish a draft release using the tag you just created
|
||||
6. Perform the [image promotion process](https://github.com/kubernetes/k8s.io/tree/main/k8s.gcr.io#image-promoter)
|
||||
6. Perform the [image promotion process](https://github.com/kubernetes/k8s.io/tree/master/k8s.gcr.io#image-promoter)
|
||||
7. Publish release
|
||||
8. Email `kubernetes-sig-scheduling@googlegroups.com` to announce the release
|
||||
|
||||
@@ -20,9 +20,9 @@
|
||||
3. Push the release branch to the descheuler repo and ensure branch protection is enabled (not required for patch releases)
|
||||
4. Tag the repository from the `master` branch (from the `release-1.18` branch for a patch release) and push the tag `VERSION=v0.18.0 git tag -m $VERSION $VERSION; git push origin $VERSION`
|
||||
5. Checkout the tag you just created and make sure your repo is clean by git's standards `git checkout $VERSION`
|
||||
6. Build and push the container image to the staging registry `VERSION=$VERSION make push-all`
|
||||
6. Build and push the container image to the staging registry `VERSION=$VERSION make push`
|
||||
7. Publish a draft release using the tag you just created
|
||||
8. Perform the [image promotion process](https://github.com/kubernetes/k8s.io/tree/main/k8s.gcr.io#image-promoter)
|
||||
8. Perform the [image promotion process](https://github.com/kubernetes/k8s.io/tree/master/k8s.gcr.io#image-promoter)
|
||||
9. Publish release
|
||||
10. Email `kubernetes-sig-scheduling@googlegroups.com` to announce the release
|
||||
|
||||
@@ -34,9 +34,6 @@ also tag patch releases (on the release-* branch) *after* pushing changes (if th
|
||||
|
||||
See [post-descheduler-push-images dashboard](https://testgrid.k8s.io/sig-scheduling#post-descheduler-push-images) for staging registry image build job status.
|
||||
|
||||
View the descheduler staging registry using [this URL](https://console.cloud.google.com/gcr/images/k8s-staging-descheduler/GLOBAL/descheduler) in a web browser
|
||||
or use the below `gcloud` commands.
|
||||
|
||||
List images in staging registry.
|
||||
```
|
||||
gcloud container images list --repository gcr.io/k8s-staging-descheduler
|
||||
|
||||
@@ -1,27 +1,9 @@
|
||||
# User Guide
|
||||
|
||||
Starting with descheduler release v0.10.0 container images are available in the official k8s container registry.
|
||||
|
||||
Descheduler Version | Container Image | Architectures |
|
||||
------------------- |--------------------------------------------|-------------------------|
|
||||
v0.24.1 | k8s.gcr.io/descheduler/descheduler:v0.24.0 | AMD64<br>ARM64<br>ARMv7 |
|
||||
v0.24.0 | k8s.gcr.io/descheduler/descheduler:v0.24.0 | AMD64<br>ARM64<br>ARMv7 |
|
||||
v0.23.1 | k8s.gcr.io/descheduler/descheduler:v0.23.1 | AMD64<br>ARM64<br>ARMv7 |
|
||||
v0.22.0 | k8s.gcr.io/descheduler/descheduler:v0.22.0 | AMD64<br>ARM64<br>ARMv7 |
|
||||
v0.21.0 | k8s.gcr.io/descheduler/descheduler:v0.21.0 | AMD64<br>ARM64<br>ARMv7 |
|
||||
v0.20.0 | k8s.gcr.io/descheduler/descheduler:v0.20.0 | AMD64<br>ARM64 |
|
||||
v0.19.0 | k8s.gcr.io/descheduler/descheduler:v0.19.0 | AMD64 |
|
||||
v0.18.0 | k8s.gcr.io/descheduler/descheduler:v0.18.0 | AMD64 |
|
||||
v0.10.0 | k8s.gcr.io/descheduler/descheduler:v0.10.0 | AMD64 |
|
||||
|
||||
Note that multi-arch container images cannot be pulled by [kind](https://kind.sigs.k8s.io) from a registry. Therefore
|
||||
starting with descheduler release v0.20.0 use the below process to download the official descheduler
|
||||
image into a kind cluster.
|
||||
```
|
||||
kind create cluster
|
||||
docker pull k8s.gcr.io/descheduler/descheduler:v0.20.0
|
||||
kind load docker-image k8s.gcr.io/descheduler/descheduler:v0.20.0
|
||||
```
|
||||
Starting with descheduler release v0.10.0 container images are available in these container registries.
|
||||
* `asia.gcr.io/k8s-artifacts-prod/descheduler/descheduler`
|
||||
* `eu.gcr.io/k8s-artifacts-prod/descheduler/descheduler`
|
||||
* `us.gcr.io/k8s-artifacts-prod/descheduler/descheduler`
|
||||
|
||||
## Policy Configuration Examples
|
||||
The [examples](https://github.com/kubernetes-sigs/descheduler/tree/master/examples) directory has descheduler policy configuration examples.
|
||||
@@ -37,52 +19,31 @@ Usage:
|
||||
descheduler [command]
|
||||
|
||||
Available Commands:
|
||||
completion generate the autocompletion script for the specified shell
|
||||
help Help about any command
|
||||
version Version of descheduler
|
||||
|
||||
Flags:
|
||||
--add-dir-header If true, adds the file directory to the header of the log messages (DEPRECATED: will be removed in a future release, see https://github.com/kubernetes/enhancements/tree/master/keps/sig-instrumentation/2845-deprecate-klog-specific-flags-in-k8s-components)
|
||||
--alsologtostderr log to standard error as well as files (DEPRECATED: will be removed in a future release, see https://github.com/kubernetes/enhancements/tree/master/keps/sig-instrumentation/2845-deprecate-klog-specific-flags-in-k8s-components)
|
||||
--bind-address ip The IP address on which to listen for the --secure-port port. The associated interface(s) must be reachable by the rest of the cluster, and by CLI/web clients. If blank or an unspecified address (0.0.0.0 or ::), all interfaces will be used. (default 0.0.0.0)
|
||||
--cert-dir string The directory where the TLS certs are located. If --tls-cert-file and --tls-private-key-file are provided, this flag will be ignored. (default "apiserver.local.config/certificates")
|
||||
--descheduling-interval duration Time interval between two consecutive descheduler executions. Setting this value instructs the descheduler to run in a continuous loop at the interval specified.
|
||||
--disable-metrics Disables metrics. The metrics are by default served through https://localhost:10258/metrics. Secure address, resp. port can be changed through --bind-address, resp. --secure-port flags.
|
||||
--dry-run execute descheduler in dry run mode.
|
||||
-h, --help help for descheduler
|
||||
--http2-max-streams-per-connection int The limit that the server gives to clients for the maximum number of streams in an HTTP/2 connection. Zero means to use golang's default.
|
||||
--kubeconfig string File with kube configuration.
|
||||
--leader-elect Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.
|
||||
--leader-elect-lease-duration duration The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled. (default 15s)
|
||||
--leader-elect-renew-deadline duration The interval between attempts by the acting master to renew a leadership slot before it stops leading. This must be less than or equal to the lease duration. This is only applicable if leader election is enabled. (default 10s)
|
||||
--leader-elect-resource-lock string The type of resource object that is used for locking during leader election. Supported options are 'endpoints', 'configmaps', 'leases', 'endpointsleases' and 'configmapsleases'. (default "leases")
|
||||
--leader-elect-resource-name string The name of resource object that is used for locking during leader election. (default "descheduler")
|
||||
--leader-elect-resource-namespace string The namespace of resource object that is used for locking during leader election. (default "kube-system")
|
||||
--leader-elect-retry-period duration The duration the clients should wait between attempting acquisition and renewal of a leadership. This is only applicable if leader election is enabled. (default 2s)
|
||||
--log-backtrace-at traceLocation when logging hits line file:N, emit a stack trace (default :0) (DEPRECATED: will be removed in a future release, see https://github.com/kubernetes/enhancements/tree/master/keps/sig-instrumentation/2845-deprecate-klog-specific-flags-in-k8s-components)
|
||||
--log-dir string If non-empty, write log files in this directory (DEPRECATED: will be removed in a future release, see https://github.com/kubernetes/enhancements/tree/master/keps/sig-instrumentation/2845-deprecate-klog-specific-flags-in-k8s-components)
|
||||
--log-file string If non-empty, use this log file (DEPRECATED: will be removed in a future release, see https://github.com/kubernetes/enhancements/tree/master/keps/sig-instrumentation/2845-deprecate-klog-specific-flags-in-k8s-components)
|
||||
--log-file-max-size uint Defines the maximum size a log file can grow to. Unit is megabytes. If the value is 0, the maximum file size is unlimited. (default 1800) (DEPRECATED: will be removed in a future release, see https://github.com/kubernetes/enhancements/tree/master/keps/sig-instrumentation/2845-deprecate-klog-specific-flags-in-k8s-components)
|
||||
--log-flush-frequency duration Maximum number of seconds between log flushes (default 5s)
|
||||
--logging-format string Sets the log format. Permitted formats: "text", "json". Non-default formats don't honor these flags: --add-dir-header, --alsologtostderr, --log-backtrace-at, --log-dir, --log-file, --log-file-max-size, --logtostderr, --skip-headers, --skip-log-headers, --stderrthreshold, --log-flush-frequency.\nNon-default choices are currently alpha and subject to change without warning. (default "text")
|
||||
--logtostderr log to standard error instead of files (default true) (DEPRECATED: will be removed in a future release, see https://github.com/kubernetes/enhancements/tree/master/keps/sig-instrumentation/2845-deprecate-klog-specific-flags-in-k8s-components)
|
||||
--one-output If true, only write logs to their native severity level (vs also writing to each lower severity level) (DEPRECATED: will be removed in a future release, see https://github.com/kubernetes/enhancements/tree/master/keps/sig-instrumentation/2845-deprecate-klog-specific-flags-in-k8s-components)
|
||||
--permit-address-sharing If true, SO_REUSEADDR will be used when binding the port. This allows binding to wildcard IPs like 0.0.0.0 and specific IPs in parallel, and it avoids waiting for the kernel to release sockets in TIME_WAIT state. [default=false]
|
||||
--permit-port-sharing If true, SO_REUSEPORT will be used when binding the port, which allows more than one instance to bind on the same address and port. [default=false]
|
||||
--policy-config-file string File with descheduler policy configuration.
|
||||
--secure-port int The port on which to serve HTTPS with authentication and authorization. If 0, don't serve HTTPS at all. (default 10258)
|
||||
--skip-headers If true, avoid header prefixes in the log messages (DEPRECATED: will be removed in a future release, see https://github.com/kubernetes/enhancements/tree/master/keps/sig-instrumentation/2845-deprecate-klog-specific-flags-in-k8s-components)
|
||||
--skip-log-headers If true, avoid headers when opening log files (DEPRECATED: will be removed in a future release, see https://github.com/kubernetes/enhancements/tree/master/keps/sig-instrumentation/2845-deprecate-klog-specific-flags-in-k8s-components)
|
||||
--stderrthreshold severity logs at or above this threshold go to stderr (default 2) (DEPRECATED: will be removed in a future release, see https://github.com/kubernetes/enhancements/tree/master/keps/sig-instrumentation/2845-deprecate-klog-specific-flags-in-k8s-components)
|
||||
--tls-cert-file string File containing the default x509 Certificate for HTTPS. (CA cert, if any, concatenated after server cert). If HTTPS serving is enabled, and --tls-cert-file and --tls-private-key-file are not provided, a self-signed certificate and key are generated for the public address and saved to the directory specified by --cert-dir.
|
||||
--tls-cipher-suites strings Comma-separated list of cipher suites for the server. If omitted, the default Go cipher suites will be used.
|
||||
Preferred values: TLS_AES_128_GCM_SHA256, TLS_AES_256_GCM_SHA384, TLS_CHACHA20_POLY1305_SHA256, TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA, TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA, TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305, TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA, TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384, TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305, TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256, TLS_RSA_WITH_AES_128_CBC_SHA, TLS_RSA_WITH_AES_128_GCM_SHA256, TLS_RSA_WITH_AES_256_CBC_SHA, TLS_RSA_WITH_AES_256_GCM_SHA384.
|
||||
Insecure values: TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256, TLS_ECDHE_ECDSA_WITH_RC4_128_SHA, TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256, TLS_ECDHE_RSA_WITH_RC4_128_SHA, TLS_RSA_WITH_3DES_EDE_CBC_SHA, TLS_RSA_WITH_AES_128_CBC_SHA256, TLS_RSA_WITH_RC4_128_SHA.
|
||||
--tls-min-version string Minimum TLS version supported. Possible values: VersionTLS10, VersionTLS11, VersionTLS12, VersionTLS13
|
||||
--tls-private-key-file string File containing the default x509 private key matching --tls-cert-file.
|
||||
--tls-sni-cert-key namedCertKey A pair of x509 certificate and private key file paths, optionally suffixed with a list of domain patterns which are fully qualified domain names, possibly with prefixed wildcard segments. The domain patterns also allow IP addresses, but IPs should only be used if the apiserver has visibility to the IP address requested by a client. If no domain patterns are provided, the names of the certificate are extracted. Non-wildcard matches trump over wildcard matches, explicit domain patterns trump over extracted names. For multiple key/certificate pairs, use the --tls-sni-cert-key multiple times. Examples: "example.crt,example.key" or "foo.crt,foo.key:*.foo.com,foo.com". (default [])
|
||||
-v, --v Level number for the log level verbosity
|
||||
--vmodule moduleSpec comma-separated list of pattern=N settings for file-filtered logging
|
||||
--add-dir-header If true, adds the file directory to the header
|
||||
--alsologtostderr log to standard error as well as files
|
||||
--descheduling-interval duration Time interval between two consecutive descheduler executions. Setting this value instructs the descheduler to run in a continuous loop at the interval specified.
|
||||
--dry-run execute descheduler in dry run mode.
|
||||
--evict-local-storage-pods Enables evicting pods using local storage by descheduler
|
||||
-h, --help help for descheduler
|
||||
--kubeconfig string File with kube configuration.
|
||||
--log-backtrace-at traceLocation when logging hits line file:N, emit a stack trace (default :0)
|
||||
--log-dir string If non-empty, write log files in this directory
|
||||
--log-file string If non-empty, use this log file
|
||||
--log-file-max-size uint Defines the maximum size a log file can grow to. Unit is megabytes. If the value is 0, the maximum file size is unlimited. (default 1800)
|
||||
--log-flush-frequency duration Maximum number of seconds between log flushes (default 5s)
|
||||
--logtostderr log to standard error instead of files (default true)
|
||||
--max-pods-to-evict-per-node int Limits the maximum number of pods to be evicted per node by descheduler
|
||||
--node-selector string Selector (label query) to filter on, supports '=', '==', and '!='.(e.g. -l key1=value1,key2=value2)
|
||||
--policy-config-file string File with descheduler policy configuration.
|
||||
--skip-headers If true, avoid header prefixes in the log messages
|
||||
--skip-log-headers If true, avoid headers when opening log files
|
||||
--stderrthreshold severity logs at or above this threshold go to stderr (default 2)
|
||||
-v, --v Level number for the log level verbosity
|
||||
--vmodule moduleSpec comma-separated list of pattern=N settings for file-filtered logging
|
||||
|
||||
Use "descheduler [command] --help" for more information about a command.
|
||||
```
|
||||
@@ -110,65 +71,20 @@ This policy configuration file ensures that pods created more than 7 days ago ar
|
||||
apiVersion: "descheduler/v1alpha1"
|
||||
kind: "DeschedulerPolicy"
|
||||
strategies:
|
||||
"LowNodeUtilization":
|
||||
enabled: false
|
||||
"RemoveDuplicates":
|
||||
enabled: false
|
||||
"RemovePodsViolatingInterPodAntiAffinity":
|
||||
enabled: false
|
||||
"RemovePodsViolatingNodeAffinity":
|
||||
enabled: false
|
||||
"RemovePodsViolatingNodeTaints":
|
||||
enabled: false
|
||||
"RemovePodsHavingTooManyRestarts":
|
||||
enabled: false
|
||||
"PodLifeTime":
|
||||
enabled: true
|
||||
params:
|
||||
podLifeTime:
|
||||
maxPodLifeTimeSeconds: 604800 # pods run for a maximum of 7 days
|
||||
maxPodLifeTimeSeconds: 604800 # pods run for a maximum of 7 days
|
||||
```
|
||||
|
||||
### Balance Cluster By Node Memory Utilization
|
||||
If your cluster has been running for a long period of time, you may find that the resource utilization is not very
|
||||
balanced. The following two strategies can be used to rebalance your cluster based on `cpu`, `memory`
|
||||
or `number of pods`.
|
||||
|
||||
#### Balance high utilization nodes
|
||||
Using `LowNodeUtilization`, descheduler will rebalance the cluster based on memory by evicting pods
|
||||
from nodes with memory utilization over 70% to nodes with memory utilization below 20%.
|
||||
|
||||
```
|
||||
apiVersion: "descheduler/v1alpha1"
|
||||
kind: "DeschedulerPolicy"
|
||||
strategies:
|
||||
"LowNodeUtilization":
|
||||
enabled: true
|
||||
params:
|
||||
nodeResourceUtilizationThresholds:
|
||||
thresholds:
|
||||
"memory": 20
|
||||
targetThresholds:
|
||||
"memory": 70
|
||||
```
|
||||
|
||||
#### Balance low utilization nodes
|
||||
Using `HighNodeUtilization`, descheduler will rebalance the cluster based on memory by evicting pods
|
||||
from nodes with memory utilization lower than 20%. This should be use `NodeResourcesFit` with the `MostAllocated` scoring strategy based on these [doc](https://kubernetes.io/docs/reference/scheduling/config/#scheduling-plugins).
|
||||
The evicted pods will be compacted into minimal set of nodes.
|
||||
|
||||
```
|
||||
apiVersion: "descheduler/v1alpha1"
|
||||
kind: "DeschedulerPolicy"
|
||||
strategies:
|
||||
"HighNodeUtilization":
|
||||
enabled: true
|
||||
params:
|
||||
nodeResourceUtilizationThresholds:
|
||||
thresholds:
|
||||
"memory": 20
|
||||
```
|
||||
|
||||
### Autoheal Node Problems
|
||||
Descheduler's `RemovePodsViolatingNodeTaints` strategy can be combined with
|
||||
[Node Problem Detector](https://github.com/kubernetes/node-problem-detector/) and
|
||||
[Cluster Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler) to automatically remove
|
||||
Nodes which have problems. Node Problem Detector can detect specific Node problems and report them to the API server.
|
||||
There is a feature called TaintNodeByCondition of the node controller that takes some conditions and turns them into taints. Currently, this only works for the default node conditions: PIDPressure, MemoryPressure, DiskPressure, Ready, and some cloud provider specific conditions.
|
||||
The Descheduler will then deschedule workloads from those Nodes. Finally, if the descheduled Node's resource
|
||||
allocation falls below the Cluster Autoscaler's scale down threshold, the Node will become a scale down candidate
|
||||
and can be removed by Cluster Autoscaler. These three components form an autohealing cycle for Node problems.
|
||||
---
|
||||
**NOTE**
|
||||
|
||||
Once [kubernetes/node-problem-detector#565](https://github.com/kubernetes/node-problem-detector/pull/565) is available in NPD, we need to update this section.
|
||||
|
||||
---
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
apiVersion: "descheduler/v1alpha1"
|
||||
kind: "DeschedulerPolicy"
|
||||
strategies:
|
||||
"RemoveFailedPods":
|
||||
enabled: true
|
||||
params:
|
||||
failedPods:
|
||||
reasons:
|
||||
- "OutOfcpu"
|
||||
- "CreateContainerConfigError"
|
||||
includingInitContainers: true
|
||||
excludeOwnerKinds:
|
||||
- "Job"
|
||||
minPodLifetimeSeconds: 3600 # 1 hour
|
||||
@@ -1,9 +0,0 @@
|
||||
apiVersion: "descheduler/v1alpha1"
|
||||
kind: "DeschedulerPolicy"
|
||||
strategies:
|
||||
"HighNodeUtilization":
|
||||
enabled: true
|
||||
params:
|
||||
nodeResourceUtilizationThresholds:
|
||||
thresholds:
|
||||
"memory": 20
|
||||
@@ -1,11 +0,0 @@
|
||||
apiVersion: "descheduler/v1alpha1"
|
||||
kind: "DeschedulerPolicy"
|
||||
strategies:
|
||||
"LowNodeUtilization":
|
||||
enabled: true
|
||||
params:
|
||||
nodeResourceUtilizationThresholds:
|
||||
thresholds:
|
||||
"memory": 20
|
||||
targetThresholds:
|
||||
"memory": 70
|
||||
@@ -1,8 +1,20 @@
|
||||
---
|
||||
apiVersion: "descheduler/v1alpha1"
|
||||
kind: "DeschedulerPolicy"
|
||||
strategies:
|
||||
"LowNodeUtilization":
|
||||
enabled: false
|
||||
"RemoveDuplicates":
|
||||
enabled: false
|
||||
"RemovePodsViolatingInterPodAntiAffinity":
|
||||
enabled: false
|
||||
"RemovePodsViolatingNodeAffinity":
|
||||
enabled: false
|
||||
"RemovePodsViolatingNodeTaints":
|
||||
enabled: false
|
||||
"RemovePodsHavingTooManyRestarts":
|
||||
enabled: false
|
||||
"PodLifeTime":
|
||||
enabled: true
|
||||
params:
|
||||
podLifeTime:
|
||||
maxPodLifeTimeSeconds: 604800 # 7 days
|
||||
maxPodLifeTimeSeconds: 604800 # 7 days
|
||||
|
||||
@@ -21,9 +21,5 @@ strategies:
|
||||
enabled: true
|
||||
params:
|
||||
podsHavingTooManyRestarts:
|
||||
podRestartThreshold: 100
|
||||
podRestartThresholds: 100
|
||||
includingInitContainers: true
|
||||
"RemovePodsViolatingTopologySpreadConstraint":
|
||||
enabled: true
|
||||
params:
|
||||
includeSoftConstraints: true
|
||||
|
||||
@@ -1,7 +0,0 @@
|
||||
apiVersion: "descheduler/v1alpha1"
|
||||
kind: "DeschedulerPolicy"
|
||||
strategies:
|
||||
"RemovePodsViolatingTopologySpreadConstraint":
|
||||
enabled: true
|
||||
params:
|
||||
includeSoftConstraints: true # Include 'ScheduleAnyways' constraints
|
||||
119
go.mod
119
go.mod
@@ -1,117 +1,14 @@
|
||||
module sigs.k8s.io/descheduler
|
||||
|
||||
go 1.18
|
||||
go 1.13
|
||||
|
||||
require (
|
||||
github.com/client9/misspell v0.3.4
|
||||
github.com/spf13/cobra v1.4.0
|
||||
github.com/spf13/cobra v0.0.5
|
||||
github.com/spf13/pflag v1.0.5
|
||||
k8s.io/api v0.24.0
|
||||
k8s.io/apimachinery v0.24.0
|
||||
k8s.io/apiserver v0.24.0
|
||||
k8s.io/client-go v0.24.0
|
||||
k8s.io/code-generator v0.24.0
|
||||
k8s.io/component-base v0.24.0
|
||||
k8s.io/component-helpers v0.24.0
|
||||
k8s.io/klog/v2 v2.60.1
|
||||
k8s.io/kubectl v0.20.5
|
||||
sigs.k8s.io/mdtoc v1.0.1
|
||||
)
|
||||
|
||||
require (
|
||||
cloud.google.com/go v0.81.0 // indirect
|
||||
github.com/Azure/go-autorest v14.2.0+incompatible // indirect
|
||||
github.com/Azure/go-autorest/autorest v0.11.18 // indirect
|
||||
github.com/Azure/go-autorest/autorest/adal v0.9.13 // indirect
|
||||
github.com/Azure/go-autorest/autorest/date v0.3.0 // indirect
|
||||
github.com/Azure/go-autorest/logger v0.2.1 // indirect
|
||||
github.com/Azure/go-autorest/tracing v0.6.0 // indirect
|
||||
github.com/BurntSushi/toml v0.3.1 // indirect
|
||||
github.com/NYTimes/gziphandler v1.1.1 // indirect
|
||||
github.com/PuerkitoBio/purell v1.1.1 // indirect
|
||||
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/blang/semver/v4 v4.0.0 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.1.2 // indirect
|
||||
github.com/coreos/go-semver v0.3.0 // indirect
|
||||
github.com/coreos/go-systemd/v22 v22.3.2 // indirect
|
||||
github.com/davecgh/go-spew v1.1.1 // indirect
|
||||
github.com/emicklei/go-restful v2.9.5+incompatible // indirect
|
||||
github.com/evanphx/json-patch v4.12.0+incompatible // indirect
|
||||
github.com/felixge/httpsnoop v1.0.1 // indirect
|
||||
github.com/form3tech-oss/jwt-go v3.2.3+incompatible // indirect
|
||||
github.com/fsnotify/fsnotify v1.4.9 // indirect
|
||||
github.com/go-logr/logr v1.2.0 // indirect
|
||||
github.com/go-logr/zapr v1.2.0 // indirect
|
||||
github.com/go-openapi/jsonpointer v0.19.5 // indirect
|
||||
github.com/go-openapi/jsonreference v0.19.5 // indirect
|
||||
github.com/go-openapi/swag v0.19.14 // indirect
|
||||
github.com/gogo/protobuf v1.3.2 // indirect
|
||||
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
|
||||
github.com/golang/protobuf v1.5.2 // indirect
|
||||
github.com/gomarkdown/markdown v0.0.0-20200824053859-8c8b3816f167 // indirect
|
||||
github.com/google/gnostic v0.5.7-v3refs // indirect
|
||||
github.com/google/go-cmp v0.5.5 // indirect
|
||||
github.com/google/gofuzz v1.1.0 // indirect
|
||||
github.com/google/uuid v1.1.2 // indirect
|
||||
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect
|
||||
github.com/grpc-ecosystem/grpc-gateway v1.16.0 // indirect
|
||||
github.com/imdario/mergo v0.3.5 // indirect
|
||||
github.com/inconshreveable/mousetrap v1.0.0 // indirect
|
||||
github.com/josharian/intern v1.0.0 // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/mailru/easyjson v0.7.6 // indirect
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect
|
||||
github.com/mmarkdown/mmark v2.0.40+incompatible // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/prometheus/client_golang v1.12.1 // indirect
|
||||
github.com/prometheus/client_model v0.2.0 // indirect
|
||||
github.com/prometheus/common v0.32.1 // indirect
|
||||
github.com/prometheus/procfs v0.7.3 // indirect
|
||||
go.etcd.io/etcd/api/v3 v3.5.1 // indirect
|
||||
go.etcd.io/etcd/client/pkg/v3 v3.5.1 // indirect
|
||||
go.etcd.io/etcd/client/v3 v3.5.1 // indirect
|
||||
go.opentelemetry.io/contrib v0.20.0 // indirect
|
||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.20.0 // indirect
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.20.0 // indirect
|
||||
go.opentelemetry.io/otel v0.20.0 // indirect
|
||||
go.opentelemetry.io/otel/exporters/otlp v0.20.0 // indirect
|
||||
go.opentelemetry.io/otel/metric v0.20.0 // indirect
|
||||
go.opentelemetry.io/otel/sdk v0.20.0 // indirect
|
||||
go.opentelemetry.io/otel/sdk/export/metric v0.20.0 // indirect
|
||||
go.opentelemetry.io/otel/sdk/metric v0.20.0 // indirect
|
||||
go.opentelemetry.io/otel/trace v0.20.0 // indirect
|
||||
go.opentelemetry.io/proto/otlp v0.7.0 // indirect
|
||||
go.uber.org/atomic v1.7.0 // indirect
|
||||
go.uber.org/multierr v1.6.0 // indirect
|
||||
go.uber.org/zap v1.19.0 // indirect
|
||||
golang.org/x/crypto v0.0.0-20220518034528-6f7dac969898 // indirect
|
||||
golang.org/x/mod v0.6.0-dev.0.20220106191415-9b9b3d81d5e3 // indirect
|
||||
golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd // indirect
|
||||
golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8 // indirect
|
||||
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c // indirect
|
||||
golang.org/x/sys v0.0.0-20220209214540-3681064d5158 // indirect
|
||||
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect
|
||||
golang.org/x/text v0.3.7 // indirect
|
||||
golang.org/x/time v0.0.0-20220210224613-90d013bbcef8 // indirect
|
||||
golang.org/x/tools v0.1.10-0.20220218145154-897bd77cd717 // indirect
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
|
||||
google.golang.org/appengine v1.6.7 // indirect
|
||||
google.golang.org/genproto v0.0.0-20220107163113-42d7afdf6368 // indirect
|
||||
google.golang.org/grpc v1.40.0 // indirect
|
||||
google.golang.org/protobuf v1.27.1 // indirect
|
||||
gopkg.in/inf.v0 v0.9.1 // indirect
|
||||
gopkg.in/natefinch/lumberjack.v2 v2.0.0 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect
|
||||
k8s.io/gengo v0.0.0-20211129171323-c02415ce4185 // indirect
|
||||
k8s.io/kube-openapi v0.0.0-20220328201542-3ee0da9b0b42 // indirect
|
||||
k8s.io/utils v0.0.0-20220210201930-3a6ce19ff2f9 // indirect
|
||||
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.0.30 // indirect
|
||||
sigs.k8s.io/json v0.0.0-20211208200746-9f7c6b3444d2 // indirect
|
||||
sigs.k8s.io/structured-merge-diff/v4 v4.2.1 // indirect
|
||||
sigs.k8s.io/yaml v1.2.0 // indirect
|
||||
k8s.io/api v0.18.2
|
||||
k8s.io/apimachinery v0.18.2
|
||||
k8s.io/apiserver v0.18.2
|
||||
k8s.io/client-go v0.18.2
|
||||
k8s.io/component-base v0.18.2
|
||||
k8s.io/klog v1.0.0
|
||||
)
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
BUILD
|
||||
CHANGELOG
|
||||
OWNERS
|
||||
go.mod
|
||||
go.sum
|
||||
vendor/
|
||||
@@ -18,15 +18,15 @@ E2E_GCE_HOME=$DESCHEDULER_ROOT/hack/e2e-gce
|
||||
|
||||
create_cluster() {
|
||||
echo "#################### Creating instances ##########################"
|
||||
gcloud compute instances create descheduler-$master_uuid --image-family="ubuntu-1804-lts" --image-project="ubuntu-os-cloud" --zone=us-east1-b
|
||||
gcloud compute instances create descheduler-$master_uuid --image="ubuntu-1604-xenial-v20180306" --image-project="ubuntu-os-cloud" --zone=us-east1-b
|
||||
# Keeping the --zone here so as to make sure that e2e's can run locally.
|
||||
echo "gcloud compute instances delete descheduler-$master_uuid --zone=us-east1-b --quiet" > $E2E_GCE_HOME/delete_cluster.sh
|
||||
|
||||
gcloud compute instances create descheduler-$node1_uuid --image-family="ubuntu-1804-lts" --image-project="ubuntu-os-cloud" --zone=us-east1-b
|
||||
|
||||
gcloud compute instances create descheduler-$node1_uuid --image="ubuntu-1604-xenial-v20180306" --image-project="ubuntu-os-cloud" --zone=us-east1-b
|
||||
echo "gcloud compute instances delete descheduler-$node1_uuid --zone=us-east1-b --quiet" >> $E2E_GCE_HOME/delete_cluster.sh
|
||||
|
||||
gcloud compute instances create descheduler-$node2_uuid --image-family="ubuntu-1804-lts" --image-project="ubuntu-os-cloud" --zone=us-east1-b
|
||||
echo "gcloud compute instances delete descheduler-$node2_uuid --zone=us-east1-c --quiet" >> $E2E_GCE_HOME/delete_cluster.sh
|
||||
gcloud compute instances create descheduler-$node2_uuid --image="ubuntu-1604-xenial-v20180306" --image-project="ubuntu-os-cloud" --zone=us-east1-b
|
||||
echo "gcloud compute instances delete descheduler-$node2_uuid --zone=us-east1-b --quiet" >> $E2E_GCE_HOME/delete_cluster.sh
|
||||
|
||||
# Delete the firewall port created for master.
|
||||
echo "gcloud compute firewall-rules delete kubeapiserver-$master_uuid --quiet" >> $E2E_GCE_HOME/delete_cluster.sh
|
||||
@@ -44,10 +44,10 @@ generate_kubeadm_instance_files() {
|
||||
|
||||
|
||||
transfer_install_files() {
|
||||
gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$master_uuid:/tmp --zone=us-east1-b
|
||||
gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$master_uuid:/tmp --zone=us-east1-b
|
||||
gcloud compute scp $E2E_GCE_HOME/kubeadm_install.sh descheduler-$master_uuid:/tmp --zone=us-east1-b
|
||||
gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$node1_uuid:/tmp --zone=us-east1-b
|
||||
gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$node2_uuid:/tmp --zone=us-east1-c
|
||||
gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$node1_uuid:/tmp --zone=us-east1-b
|
||||
gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$node2_uuid:/tmp --zone=us-east1-b
|
||||
}
|
||||
|
||||
|
||||
@@ -55,19 +55,19 @@ install_kube() {
|
||||
# Docker installation.
|
||||
gcloud compute ssh descheduler-$master_uuid --command "sudo apt-get update; sudo apt-get install -y docker.io" --zone=us-east1-b
|
||||
gcloud compute ssh descheduler-$node1_uuid --command "sudo apt-get update; sudo apt-get install -y docker.io" --zone=us-east1-b
|
||||
gcloud compute ssh descheduler-$node2_uuid --command "sudo apt-get update; sudo apt-get install -y docker.io" --zone=us-east1-c
|
||||
gcloud compute ssh descheduler-$node2_uuid --command "sudo apt-get update; sudo apt-get install -y docker.io" --zone=us-east1-b
|
||||
# kubeadm installation.
|
||||
# 1. Transfer files to master, nodes.
|
||||
transfer_install_files
|
||||
# 2. Install kubeadm.
|
||||
#TODO: Add rm /tmp/kubeadm_install.sh
|
||||
# Open port for kube API server
|
||||
gcloud compute firewall-rules create kubeapiserver-$master_uuid --allow tcp:6443 --source-tags=descheduler-$master_uuid --source-ranges=0.0.0.0/0 --description="Opening api server port"
|
||||
gcloud compute firewall-rules create kubeapiserver-$master_uuid --allow tcp:6443 --source-tags=descheduler-$master_uuid --source-ranges=0.0.0.0/0 --description="Opening api server port"
|
||||
|
||||
gcloud compute ssh descheduler-$master_uuid --command "sudo chmod 755 /tmp/kubeadm_preinstall.sh; sudo /tmp/kubeadm_preinstall.sh" --zone=us-east1-b
|
||||
kubeadm_join_command=$(gcloud compute ssh descheduler-$master_uuid --command "sudo chmod 755 /tmp/kubeadm_install.sh; sudo /tmp/kubeadm_install.sh" --zone=us-east1-b|grep 'kubeadm join')
|
||||
|
||||
# Copy the kubeconfig file onto /tmp for e2e tests.
|
||||
# Copy the kubeconfig file onto /tmp for e2e tests.
|
||||
gcloud compute ssh descheduler-$master_uuid --command "sudo cp /etc/kubernetes/admin.conf /tmp; sudo chmod 777 /tmp/admin.conf" --zone=us-east1-b
|
||||
gcloud compute scp descheduler-$master_uuid:/tmp/admin.conf /tmp/admin.conf --zone=us-east1-b
|
||||
|
||||
@@ -75,15 +75,16 @@ install_kube() {
|
||||
gcloud compute ssh descheduler-$master_uuid --command "sudo kubectl apply -f https://raw.githubusercontent.com/cloudnativelabs/kube-router/master/daemonset/kubeadm-kuberouter.yaml --kubeconfig /etc/kubernetes/admin.conf" --zone=us-east1-b
|
||||
echo $kubeadm_join_command > $E2E_GCE_HOME/kubeadm_join.sh
|
||||
|
||||
# Copy kubeadm_join to every node.
|
||||
# Copy kubeadm_join to every node.
|
||||
#TODO: Put these in a loop, so that extension becomes possible.
|
||||
gcloud compute ssh descheduler-$node1_uuid --command "sudo chmod 755 /tmp/kubeadm_preinstall.sh; sudo /tmp/kubeadm_preinstall.sh" --zone=us-east1-b
|
||||
gcloud compute scp $E2E_GCE_HOME/kubeadm_join.sh descheduler-$node1_uuid:/tmp --zone=us-east1-b
|
||||
gcloud compute ssh descheduler-$node1_uuid --command "sudo chmod 755 /tmp/kubeadm_join.sh; sudo /tmp/kubeadm_join.sh" --zone=us-east1-b
|
||||
|
||||
gcloud compute ssh descheduler-$node2_uuid --command "sudo chmod 755 /tmp/kubeadm_preinstall.sh; sudo /tmp/kubeadm_preinstall.sh" --zone=us-east1-c
|
||||
gcloud compute scp $E2E_GCE_HOME/kubeadm_join.sh descheduler-$node2_uuid:/tmp --zone=us-east1-c
|
||||
gcloud compute ssh descheduler-$node2_uuid --command "sudo chmod 755 /tmp/kubeadm_join.sh; sudo /tmp/kubeadm_join.sh" --zone=us-east1-c
|
||||
gcloud compute ssh descheduler-$node2_uuid --command "sudo chmod 755 /tmp/kubeadm_preinstall.sh; sudo /tmp/kubeadm_preinstall.sh" --zone=us-east1-b
|
||||
gcloud compute scp $E2E_GCE_HOME/kubeadm_join.sh descheduler-$node2_uuid:/tmp --zone=us-east1-b
|
||||
gcloud compute ssh descheduler-$node2_uuid --command "sudo chmod 755 /tmp/kubeadm_join.sh; sudo /tmp/kubeadm_join.sh" --zone=us-east1-b
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -1,18 +1,6 @@
|
||||
kind: Cluster
|
||||
apiVersion: kind.x-k8s.io/v1alpha4
|
||||
apiVersion: kind.sigs.k8s.io/v1alpha3
|
||||
nodes:
|
||||
- role: control-plane
|
||||
- role: worker
|
||||
kubeadmConfigPatches:
|
||||
- |
|
||||
kind: JoinConfiguration
|
||||
nodeRegistration:
|
||||
kubeletExtraArgs:
|
||||
node-labels: "topology.kubernetes.io/zone=local-a"
|
||||
- role: worker
|
||||
kubeadmConfigPatches:
|
||||
- |
|
||||
kind: JoinConfiguration
|
||||
nodeRegistration:
|
||||
kubeletExtraArgs:
|
||||
node-labels: "topology.kubernetes.io/zone=local-b"
|
||||
|
||||
@@ -1,27 +0,0 @@
|
||||
//go:build tools
|
||||
// +build tools
|
||||
|
||||
/*
|
||||
Copyright 2019 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
// This package imports things required by build scripts, to force `go mod` to see them as dependencies
|
||||
package tools
|
||||
|
||||
import (
|
||||
_ "github.com/client9/misspell/cmd/misspell"
|
||||
_ "k8s.io/code-generator"
|
||||
_ "sigs.k8s.io/mdtoc"
|
||||
)
|
||||
@@ -23,7 +23,7 @@ DESCHEDULER_ROOT=$(dirname "${BASH_SOURCE}")/..
|
||||
|
||||
GO_VERSION=($(go version))
|
||||
|
||||
if [[ -z $(echo "${GO_VERSION[2]}" | grep -E 'go1.17|go1.18') ]]; then
|
||||
if [[ -z $(echo "${GO_VERSION[2]}" | grep -E 'go1.2|go1.3|go1.4|go1.5|go1.6|go1.7|go1.8|go1.9|go1.10|go1.11|go1.12|go1.13') ]]; then
|
||||
echo "Unknown go version '${GO_VERSION[2]}', skipping gofmt."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright 2021 The Kubernetes Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
source "$(dirname "${BASH_SOURCE}")/lib/init.sh"
|
||||
|
||||
go build -o "${OS_OUTPUT_BINPATH}/mdtoc" "sigs.k8s.io/mdtoc"
|
||||
|
||||
${OS_OUTPUT_BINPATH}/mdtoc --inplace README.md
|
||||
@@ -1,36 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
source "$(dirname "${BASH_SOURCE}")/lib/init.sh"
|
||||
DESCHEDULER_ROOT=$(dirname "${BASH_SOURCE}")/..
|
||||
mkdir -p "${DESCHEDULER_ROOT}/_tmp"
|
||||
_tmpdir="$(mktemp -d "${DESCHEDULER_ROOT}/_tmp/kube-verify.XXXXXX")"
|
||||
|
||||
_deschedulertmp="${_tmpdir}"
|
||||
mkdir -p "${_deschedulertmp}"
|
||||
|
||||
git archive --format=tar --prefix=descheduler/ "$(git write-tree)" | (cd "${_deschedulertmp}" && tar xf -)
|
||||
_deschedulertmp="${_deschedulertmp}/descheduler"
|
||||
|
||||
pushd "${_deschedulertmp}" > /dev/null 2>&1
|
||||
go build -o "${OS_OUTPUT_BINPATH}/conversion-gen" "k8s.io/code-generator/cmd/conversion-gen"
|
||||
|
||||
${OS_OUTPUT_BINPATH}/conversion-gen \
|
||||
--go-header-file "hack/boilerplate/boilerplate.go.txt" \
|
||||
--input-dirs "./pkg/apis/componentconfig/v1alpha1,./pkg/api/v1alpha1" \
|
||||
--output-file-base zz_generated.conversion
|
||||
popd > /dev/null 2>&1
|
||||
|
||||
pushd "${DESCHEDULER_ROOT}" > /dev/null 2>&1
|
||||
if ! _out="$(diff -Naupr pkg/ "${_deschedulertmp}/pkg/")"; then
|
||||
echo "Generated output differs:" >&2
|
||||
echo "${_out}" >&2
|
||||
echo "Generated conversions verify failed. Please run ./hack/update-generated-conversions.sh"
|
||||
exit 1
|
||||
fi
|
||||
popd > /dev/null 2>&1
|
||||
|
||||
echo "Generated conversions verified."
|
||||
@@ -1,36 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
source "$(dirname "${BASH_SOURCE}")/lib/init.sh"
|
||||
DESCHEDULER_ROOT=$(dirname "${BASH_SOURCE}")/..
|
||||
mkdir -p "${DESCHEDULER_ROOT}/_tmp"
|
||||
_tmpdir="$(mktemp -d "${DESCHEDULER_ROOT}/_tmp/kube-verify.XXXXXX")"
|
||||
|
||||
_deschedulertmp="${_tmpdir}"
|
||||
mkdir -p "${_deschedulertmp}"
|
||||
|
||||
git archive --format=tar --prefix=descheduler/ "$(git write-tree)" | (cd "${_deschedulertmp}" && tar xf -)
|
||||
_deschedulertmp="${_deschedulertmp}/descheduler"
|
||||
|
||||
pushd "${_deschedulertmp}" > /dev/null 2>&1
|
||||
go build -o "${OS_OUTPUT_BINPATH}/deepcopy-gen" "k8s.io/code-generator/cmd/deepcopy-gen"
|
||||
|
||||
${OS_OUTPUT_BINPATH}/deepcopy-gen \
|
||||
--go-header-file "hack/boilerplate/boilerplate.go.txt" \
|
||||
--input-dirs "./pkg/apis/componentconfig,./pkg/apis/componentconfig/v1alpha1,./pkg/api,./pkg/api/v1alpha1" \
|
||||
--output-file-base zz_generated.deepcopy
|
||||
popd > /dev/null 2>&1
|
||||
|
||||
pushd "${DESCHEDULER_ROOT}" > /dev/null 2>&1
|
||||
if ! _out="$(diff -Naupr pkg/ "${_deschedulertmp}/pkg/")"; then
|
||||
echo "Generated deep-copies output differs:" >&2
|
||||
echo "${_out}" >&2
|
||||
echo "Generated deep-copies verify failed. Please run ./hack/update-generated-deep-copies.sh"
|
||||
exit 1
|
||||
fi
|
||||
popd > /dev/null 2>&1
|
||||
|
||||
echo "Generated deep-copies verified."
|
||||
@@ -1,35 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
source "$(dirname "${BASH_SOURCE}")/lib/init.sh"
|
||||
DESCHEDULER_ROOT=$(dirname "${BASH_SOURCE}")/..
|
||||
_tmpdir="$(mktemp -d "${DESCHEDULER_ROOT}/_tmp/kube-verify.XXXXXX")"
|
||||
|
||||
_deschedulertmp="${_tmpdir}"
|
||||
mkdir -p "${_deschedulertmp}"
|
||||
|
||||
git archive --format=tar --prefix=descheduler/ "$(git write-tree)" | (cd "${_deschedulertmp}" && tar xf -)
|
||||
_deschedulertmp="${_deschedulertmp}/descheduler"
|
||||
|
||||
pushd "${_deschedulertmp}" > /dev/null 2>&1
|
||||
go build -o "${OS_OUTPUT_BINPATH}/defaulter-gen" "k8s.io/code-generator/cmd/defaulter-gen"
|
||||
|
||||
${OS_OUTPUT_BINPATH}/defaulter-gen \
|
||||
--go-header-file "hack/boilerplate/boilerplate.go.txt" \
|
||||
--input-dirs "${PRJ_PREFIX}/pkg/apis/componentconfig/v1alpha1,${PRJ_PREFIX}/pkg/api/v1alpha1" \
|
||||
--extra-peer-dirs "${PRJ_PREFIX}/pkg/apis/componentconfig/v1alpha1,${PRJ_PREFIX}/pkg/api/v1alpha1" \
|
||||
--output-file-base zz_generated.defaults
|
||||
popd > /dev/null 2>&1
|
||||
|
||||
pushd "${DESCHEDULER_ROOT}" > /dev/null 2>&1
|
||||
if ! _out="$(diff -Naupr pkg/ "${_deschedulertmp}/pkg/")"; then
|
||||
echo "Generated defaulters output differs:" >&2
|
||||
echo "${_out}" >&2
|
||||
echo "Generated defaulters verify failed. Please run ./hack/update-generated-defaulters.sh"
|
||||
fi
|
||||
popd > /dev/null 2>&1
|
||||
|
||||
echo "Generated Defaulters verified."
|
||||
@@ -23,7 +23,7 @@ DESCHEDULER_ROOT=$(dirname "${BASH_SOURCE}")/..
|
||||
|
||||
GO_VERSION=($(go version))
|
||||
|
||||
if [[ -z $(echo "${GO_VERSION[2]}" | grep -E 'go1.17|go1.18') ]]; then
|
||||
if [[ -z $(echo "${GO_VERSION[2]}" | grep -E 'go1.2|go1.3|go1.4|go1.5|go1.6|go1.7|go1.8|go1.9|go1.10|go1.11|go1.12|go1.13') ]]; then
|
||||
echo "Unknown go version '${GO_VERSION[2]}', skipping gofmt."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright 2021 The Kubernetes Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
source "$(dirname "${BASH_SOURCE}")/lib/init.sh"
|
||||
|
||||
go vet ${OS_ROOT}/...
|
||||
@@ -1,41 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# Copyright 2018 The Kubernetes Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This script checks commonly misspelled English words in all files in the
|
||||
# working directory by client9/misspell package.
|
||||
# Usage: `hack/verify-spelling.sh`.
|
||||
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
KUBE_ROOT=$(dirname "${BASH_SOURCE[0]}")/..
|
||||
export KUBE_ROOT
|
||||
source "${KUBE_ROOT}/hack/lib/init.sh"
|
||||
|
||||
# Ensure that we find the binaries we build before anything else.
|
||||
export GOBIN="${OS_OUTPUT_BINPATH}"
|
||||
PATH="${GOBIN}:${PATH}"
|
||||
|
||||
# Install tools we need
|
||||
pushd "${KUBE_ROOT}" >/dev/null
|
||||
GO111MODULE=on go install github.com/client9/misspell/cmd/misspell
|
||||
popd >/dev/null
|
||||
|
||||
# Spell checking
|
||||
# All the skipping files are defined in hack/.spelling_failures
|
||||
skipping_file="${KUBE_ROOT}/hack/.spelling_failures"
|
||||
failing_packages=$(sed "s| | -e |g" "${skipping_file}")
|
||||
git ls-files | grep -v -e "${failing_packages}" | xargs misspell -i "Creater,creater,ect" -error -o stderr
|
||||
@@ -1,29 +0,0 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Copyright 2021 The Kubernetes Authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
set -o errexit
|
||||
set -o nounset
|
||||
set -o pipefail
|
||||
|
||||
source "$(dirname "${BASH_SOURCE}")/lib/init.sh"
|
||||
|
||||
go build -o "${OS_OUTPUT_BINPATH}/mdtoc" "sigs.k8s.io/mdtoc"
|
||||
|
||||
if ! ${OS_OUTPUT_BINPATH}/mdtoc --inplace --dryrun README.md
|
||||
then
|
||||
echo "ERROR: Changes detected to table of contents. Run ./hack/update-toc.sh" >&2
|
||||
exit 1
|
||||
fi
|
||||
@@ -1,6 +0,0 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
resources:
|
||||
- configmap.yaml
|
||||
- rbac.yaml
|
||||
@@ -1,5 +1,5 @@
|
||||
---
|
||||
apiVersion: batch/v1 # for k8s version < 1.21.0, use batch/v1beta1
|
||||
apiVersion: batch/v1beta1
|
||||
kind: CronJob
|
||||
metadata:
|
||||
name: descheduler-cronjob
|
||||
@@ -16,7 +16,7 @@ spec:
|
||||
priorityClassName: system-cluster-critical
|
||||
containers:
|
||||
- name: descheduler
|
||||
image: k8s.gcr.io/descheduler/descheduler:v0.24.1
|
||||
image: us.gcr.io/k8s-artifacts-prod/descheduler/descheduler:v0.18.0
|
||||
volumeMounts:
|
||||
- mountPath: /policy-dir
|
||||
name: policy-volume
|
||||
@@ -27,26 +27,6 @@ spec:
|
||||
- "/policy-dir/policy.yaml"
|
||||
- "--v"
|
||||
- "3"
|
||||
resources:
|
||||
requests:
|
||||
cpu: "500m"
|
||||
memory: "256Mi"
|
||||
livenessProbe:
|
||||
failureThreshold: 3
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 10258
|
||||
scheme: HTTPS
|
||||
initialDelaySeconds: 3
|
||||
periodSeconds: 10
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
privileged: false
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
restartPolicy: "Never"
|
||||
serviceAccountName: descheduler-sa
|
||||
volumes:
|
||||
@@ -1,6 +0,0 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
resources:
|
||||
- ../base
|
||||
- cronjob.yaml
|
||||
@@ -1,62 +0,0 @@
|
||||
apiVersion: apps/v1
|
||||
kind: Deployment
|
||||
metadata:
|
||||
name: descheduler
|
||||
namespace: kube-system
|
||||
labels:
|
||||
app: descheduler
|
||||
spec:
|
||||
replicas: 1
|
||||
selector:
|
||||
matchLabels:
|
||||
app: descheduler
|
||||
template:
|
||||
metadata:
|
||||
labels:
|
||||
app: descheduler
|
||||
spec:
|
||||
priorityClassName: system-cluster-critical
|
||||
serviceAccountName: descheduler-sa
|
||||
containers:
|
||||
- name: descheduler
|
||||
image: k8s.gcr.io/descheduler/descheduler:v0.24.1
|
||||
imagePullPolicy: IfNotPresent
|
||||
command:
|
||||
- "/bin/descheduler"
|
||||
args:
|
||||
- "--policy-config-file"
|
||||
- "/policy-dir/policy.yaml"
|
||||
- "--descheduling-interval"
|
||||
- "5m"
|
||||
- "--v"
|
||||
- "3"
|
||||
ports:
|
||||
- containerPort: 10258
|
||||
protocol: TCP
|
||||
livenessProbe:
|
||||
failureThreshold: 3
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 10258
|
||||
scheme: HTTPS
|
||||
initialDelaySeconds: 3
|
||||
periodSeconds: 10
|
||||
resources:
|
||||
requests:
|
||||
cpu: 500m
|
||||
memory: 256Mi
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
privileged: false
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
volumeMounts:
|
||||
- mountPath: /policy-dir
|
||||
name: policy-volume
|
||||
volumes:
|
||||
- name: policy-volume
|
||||
configMap:
|
||||
name: descheduler-policy-configmap
|
||||
@@ -1,6 +0,0 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
resources:
|
||||
- ../base
|
||||
- deployment.yaml
|
||||
@@ -14,7 +14,7 @@ spec:
|
||||
priorityClassName: system-cluster-critical
|
||||
containers:
|
||||
- name: descheduler
|
||||
image: k8s.gcr.io/descheduler/descheduler:v0.24.1
|
||||
image: us.gcr.io/k8s-artifacts-prod/descheduler/descheduler:v0.18.0
|
||||
volumeMounts:
|
||||
- mountPath: /policy-dir
|
||||
name: policy-volume
|
||||
@@ -25,26 +25,6 @@ spec:
|
||||
- "/policy-dir/policy.yaml"
|
||||
- "--v"
|
||||
- "3"
|
||||
resources:
|
||||
requests:
|
||||
cpu: "500m"
|
||||
memory: "256Mi"
|
||||
livenessProbe:
|
||||
failureThreshold: 3
|
||||
httpGet:
|
||||
path: /healthz
|
||||
port: 10258
|
||||
scheme: HTTPS
|
||||
initialDelaySeconds: 3
|
||||
periodSeconds: 10
|
||||
securityContext:
|
||||
allowPrivilegeEscalation: false
|
||||
capabilities:
|
||||
drop:
|
||||
- ALL
|
||||
privileged: false
|
||||
readOnlyRootFilesystem: true
|
||||
runAsNonRoot: true
|
||||
restartPolicy: "Never"
|
||||
serviceAccountName: descheduler-sa
|
||||
volumes:
|
||||
@@ -1,6 +0,0 @@
|
||||
apiVersion: kustomize.config.k8s.io/v1beta1
|
||||
kind: Kustomization
|
||||
|
||||
resources:
|
||||
- ../base
|
||||
- job.yaml
|
||||
@@ -3,6 +3,7 @@ kind: ClusterRole
|
||||
apiVersion: rbac.authorization.k8s.io/v1
|
||||
metadata:
|
||||
name: descheduler-cluster-role
|
||||
namespace: kube-system
|
||||
rules:
|
||||
- apiGroups: [""]
|
||||
resources: ["events"]
|
||||
@@ -10,25 +11,12 @@ rules:
|
||||
- apiGroups: [""]
|
||||
resources: ["nodes"]
|
||||
verbs: ["get", "watch", "list"]
|
||||
- apiGroups: [""]
|
||||
resources: ["namespaces"]
|
||||
verbs: ["get", "watch", "list"]
|
||||
- apiGroups: [""]
|
||||
resources: ["pods"]
|
||||
verbs: ["get", "watch", "list", "delete"]
|
||||
- apiGroups: [""]
|
||||
resources: ["pods/eviction"]
|
||||
verbs: ["create"]
|
||||
- apiGroups: ["scheduling.k8s.io"]
|
||||
resources: ["priorityclasses"]
|
||||
verbs: ["get", "watch", "list"]
|
||||
- apiGroups: ["coordination.k8s.io"]
|
||||
resources: ["leases"]
|
||||
verbs: ["create"]
|
||||
- apiGroups: ["coordination.k8s.io"]
|
||||
resources: ["leases"]
|
||||
resourceNames: ["descheduler"]
|
||||
verbs: ["get", "patch", "delete"]
|
||||
---
|
||||
apiVersion: v1
|
||||
kind: ServiceAccount
|
||||
@@ -40,6 +28,7 @@ apiVersion: rbac.authorization.k8s.io/v1
|
||||
kind: ClusterRoleBinding
|
||||
metadata:
|
||||
name: descheduler-cluster-role-binding
|
||||
namespace: kube-system
|
||||
roleRef:
|
||||
apiGroup: rbac.authorization.k8s.io
|
||||
kind: ClusterRole
|
||||
@@ -48,3 +37,4 @@ subjects:
|
||||
- name: descheduler-sa
|
||||
kind: ServiceAccount
|
||||
namespace: kube-system
|
||||
|
||||
@@ -1,72 +0,0 @@
|
||||
/*
|
||||
Copyright 2021 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"sync"
|
||||
|
||||
"k8s.io/component-base/metrics"
|
||||
"k8s.io/component-base/metrics/legacyregistry"
|
||||
"sigs.k8s.io/descheduler/pkg/version"
|
||||
)
|
||||
|
||||
const (
|
||||
// DeschedulerSubsystem - subsystem name used by descheduler
|
||||
DeschedulerSubsystem = "descheduler"
|
||||
)
|
||||
|
||||
var (
|
||||
PodsEvicted = metrics.NewCounterVec(
|
||||
&metrics.CounterOpts{
|
||||
Subsystem: DeschedulerSubsystem,
|
||||
Name: "pods_evicted",
|
||||
Help: "Number of evicted pods, by the result, by the strategy, by the namespace, by the node name. 'error' result means a pod could not be evicted",
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
}, []string{"result", "strategy", "namespace", "node"})
|
||||
|
||||
buildInfo = metrics.NewGauge(
|
||||
&metrics.GaugeOpts{
|
||||
Subsystem: DeschedulerSubsystem,
|
||||
Name: "build_info",
|
||||
Help: "Build info about descheduler, including Go version, Descheduler version, Git SHA, Git branch",
|
||||
ConstLabels: map[string]string{"GoVersion": version.Get().GoVersion, "DeschedulerVersion": version.Get().GitVersion, "GitBranch": version.Get().GitBranch, "GitSha1": version.Get().GitSha1},
|
||||
StabilityLevel: metrics.ALPHA,
|
||||
},
|
||||
)
|
||||
|
||||
metricsList = []metrics.Registerable{
|
||||
PodsEvicted,
|
||||
buildInfo,
|
||||
}
|
||||
)
|
||||
|
||||
var registerMetrics sync.Once
|
||||
|
||||
// Register all metrics.
|
||||
func Register() {
|
||||
// Register the metrics.
|
||||
registerMetrics.Do(func() {
|
||||
RegisterMetrics(metricsList...)
|
||||
})
|
||||
}
|
||||
|
||||
// RegisterMetrics registers a list of metrics.
|
||||
func RegisterMetrics(extraMetrics ...metrics.Registerable) {
|
||||
for _, metric := range extraMetrics {
|
||||
legacyregistry.MustRegister(metric)
|
||||
}
|
||||
}
|
||||
@@ -19,6 +19,8 @@ package api
|
||||
import (
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/scheme"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -33,6 +35,12 @@ const GroupName = "descheduler"
|
||||
// SchemeGroupVersion is group version used to register these objects
|
||||
var SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: runtime.APIVersionInternal}
|
||||
|
||||
func init() {
|
||||
if err := addKnownTypes(scheme.Scheme); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Kind takes an unqualified kind and returns a Group qualified GroupKind
|
||||
func Kind(kind string) schema.GroupKind {
|
||||
return SchemeGroupVersion.WithKind(kind).GroupKind()
|
||||
|
||||
@@ -28,27 +28,6 @@ type DeschedulerPolicy struct {
|
||||
|
||||
// Strategies
|
||||
Strategies StrategyList
|
||||
|
||||
// NodeSelector for a set of nodes to operate over
|
||||
NodeSelector *string
|
||||
|
||||
// EvictFailedBarePods allows pods without ownerReferences and in failed phase to be evicted.
|
||||
EvictFailedBarePods *bool
|
||||
|
||||
// EvictLocalStoragePods allows pods using local storage to be evicted.
|
||||
EvictLocalStoragePods *bool
|
||||
|
||||
// EvictSystemCriticalPods allows eviction of pods of any priority (including Kubernetes system pods)
|
||||
EvictSystemCriticalPods *bool
|
||||
|
||||
// IgnorePVCPods prevents pods with PVCs from being evicted.
|
||||
IgnorePVCPods *bool
|
||||
|
||||
// MaxNoOfPodsToEvictPerNode restricts maximum of pods to be evicted per node.
|
||||
MaxNoOfPodsToEvictPerNode *uint
|
||||
|
||||
// MaxNoOfPodsToEvictPerNamespace restricts maximum of pods to be evicted per namespace.
|
||||
MaxNoOfPodsToEvictPerNamespace *uint
|
||||
}
|
||||
|
||||
type StrategyName string
|
||||
@@ -62,44 +41,25 @@ type DeschedulerStrategy struct {
|
||||
Weight int
|
||||
|
||||
// Strategy parameters
|
||||
Params *StrategyParameters
|
||||
Params StrategyParameters
|
||||
}
|
||||
|
||||
// Namespaces carries a list of included/excluded namespaces
|
||||
// for which a given strategy is applicable
|
||||
type Namespaces struct {
|
||||
Include []string
|
||||
Exclude []string
|
||||
}
|
||||
|
||||
// Besides Namespaces only one of its members may be specified
|
||||
// TODO(jchaloup): move Namespaces ThresholdPriority and ThresholdPriorityClassName to individual strategies
|
||||
// once the policy version is bumped to v1alpha2
|
||||
// Only one of its members may be specified
|
||||
type StrategyParameters struct {
|
||||
NodeResourceUtilizationThresholds *NodeResourceUtilizationThresholds
|
||||
NodeAffinityType []string
|
||||
PodsHavingTooManyRestarts *PodsHavingTooManyRestarts
|
||||
PodLifeTime *PodLifeTime
|
||||
MaxPodLifeTimeSeconds *uint
|
||||
RemoveDuplicates *RemoveDuplicates
|
||||
FailedPods *FailedPods
|
||||
IncludeSoftConstraints bool
|
||||
Namespaces *Namespaces
|
||||
ThresholdPriority *int32
|
||||
ThresholdPriorityClassName string
|
||||
LabelSelector *metav1.LabelSelector
|
||||
NodeFit bool
|
||||
IncludePreferNoSchedule bool
|
||||
ExcludedTaints []string
|
||||
}
|
||||
|
||||
type Percentage float64
|
||||
type ResourceThresholds map[v1.ResourceName]Percentage
|
||||
|
||||
type NodeResourceUtilizationThresholds struct {
|
||||
UseDeviationThresholds bool
|
||||
Thresholds ResourceThresholds
|
||||
TargetThresholds ResourceThresholds
|
||||
NumberOfNodes int
|
||||
Thresholds ResourceThresholds
|
||||
TargetThresholds ResourceThresholds
|
||||
NumberOfNodes int
|
||||
}
|
||||
|
||||
type PodsHavingTooManyRestarts struct {
|
||||
@@ -110,15 +70,3 @@ type PodsHavingTooManyRestarts struct {
|
||||
type RemoveDuplicates struct {
|
||||
ExcludeOwnerKinds []string
|
||||
}
|
||||
|
||||
type PodLifeTime struct {
|
||||
MaxPodLifeTimeSeconds *uint
|
||||
PodStatusPhases []string
|
||||
}
|
||||
|
||||
type FailedPods struct {
|
||||
ExcludeOwnerKinds []string
|
||||
MinPodLifetimeSeconds *uint
|
||||
Reasons []string
|
||||
IncludingInitContainers bool
|
||||
}
|
||||
|
||||
@@ -28,27 +28,6 @@ type DeschedulerPolicy struct {
|
||||
|
||||
// Strategies
|
||||
Strategies StrategyList `json:"strategies,omitempty"`
|
||||
|
||||
// NodeSelector for a set of nodes to operate over
|
||||
NodeSelector *string `json:"nodeSelector,omitempty"`
|
||||
|
||||
// EvictFailedBarePods allows pods without ownerReferences and in failed phase to be evicted.
|
||||
EvictFailedBarePods *bool `json:"evictFailedBarePods,omitempty"`
|
||||
|
||||
// EvictLocalStoragePods allows pods using local storage to be evicted.
|
||||
EvictLocalStoragePods *bool `json:"evictLocalStoragePods,omitempty"`
|
||||
|
||||
// EvictSystemCriticalPods allows eviction of pods of any priority (including Kubernetes system pods)
|
||||
EvictSystemCriticalPods *bool `json:"evictSystemCriticalPods,omitempty"`
|
||||
|
||||
// IgnorePVCPods prevents pods with PVCs from being evicted.
|
||||
IgnorePVCPods *bool `json:"ignorePvcPods,omitempty"`
|
||||
|
||||
// MaxNoOfPodsToEvictPerNode restricts maximum of pods to be evicted per node.
|
||||
MaxNoOfPodsToEvictPerNode *int `json:"maxNoOfPodsToEvictPerNode,omitempty"`
|
||||
|
||||
// MaxNoOfPodsToEvictPerNamespace restricts maximum of pods to be evicted per namespace.
|
||||
MaxNoOfPodsToEvictPerNamespace *int `json:"maxNoOfPodsToEvictPerNamespace,omitempty"`
|
||||
}
|
||||
|
||||
type StrategyName string
|
||||
@@ -62,42 +41,25 @@ type DeschedulerStrategy struct {
|
||||
Weight int `json:"weight,omitempty"`
|
||||
|
||||
// Strategy parameters
|
||||
Params *StrategyParameters `json:"params,omitempty"`
|
||||
Params StrategyParameters `json:"params,omitempty"`
|
||||
}
|
||||
|
||||
// Namespaces carries a list of included/excluded namespaces
|
||||
// for which a given strategy is applicable.
|
||||
type Namespaces struct {
|
||||
Include []string `json:"include"`
|
||||
Exclude []string `json:"exclude"`
|
||||
}
|
||||
|
||||
// Besides Namespaces ThresholdPriority and ThresholdPriorityClassName only one of its members may be specified
|
||||
// Only one of its members may be specified
|
||||
type StrategyParameters struct {
|
||||
NodeResourceUtilizationThresholds *NodeResourceUtilizationThresholds `json:"nodeResourceUtilizationThresholds,omitempty"`
|
||||
NodeAffinityType []string `json:"nodeAffinityType,omitempty"`
|
||||
PodsHavingTooManyRestarts *PodsHavingTooManyRestarts `json:"podsHavingTooManyRestarts,omitempty"`
|
||||
PodLifeTime *PodLifeTime `json:"podLifeTime,omitempty"`
|
||||
MaxPodLifeTimeSeconds *uint `json:"maxPodLifeTimeSeconds,omitempty"`
|
||||
RemoveDuplicates *RemoveDuplicates `json:"removeDuplicates,omitempty"`
|
||||
FailedPods *FailedPods `json:"failedPods,omitempty"`
|
||||
IncludeSoftConstraints bool `json:"includeSoftConstraints"`
|
||||
Namespaces *Namespaces `json:"namespaces"`
|
||||
ThresholdPriority *int32 `json:"thresholdPriority"`
|
||||
ThresholdPriorityClassName string `json:"thresholdPriorityClassName"`
|
||||
LabelSelector *metav1.LabelSelector `json:"labelSelector"`
|
||||
NodeFit bool `json:"nodeFit"`
|
||||
IncludePreferNoSchedule bool `json:"includePreferNoSchedule"`
|
||||
ExcludedTaints []string `json:"excludedTaints,omitempty"`
|
||||
}
|
||||
|
||||
type Percentage float64
|
||||
type ResourceThresholds map[v1.ResourceName]Percentage
|
||||
|
||||
type NodeResourceUtilizationThresholds struct {
|
||||
UseDeviationThresholds bool `json:"useDeviationThresholds,omitempty"`
|
||||
Thresholds ResourceThresholds `json:"thresholds,omitempty"`
|
||||
TargetThresholds ResourceThresholds `json:"targetThresholds,omitempty"`
|
||||
NumberOfNodes int `json:"numberOfNodes,omitempty"`
|
||||
Thresholds ResourceThresholds `json:"thresholds,omitempty"`
|
||||
TargetThresholds ResourceThresholds `json:"targetThresholds,omitempty"`
|
||||
NumberOfNodes int `json:"numberOfNodes,omitempty"`
|
||||
}
|
||||
|
||||
type PodsHavingTooManyRestarts struct {
|
||||
@@ -108,15 +70,3 @@ type PodsHavingTooManyRestarts struct {
|
||||
type RemoveDuplicates struct {
|
||||
ExcludeOwnerKinds []string `json:"excludeOwnerKinds,omitempty"`
|
||||
}
|
||||
|
||||
type PodLifeTime struct {
|
||||
MaxPodLifeTimeSeconds *uint `json:"maxPodLifeTimeSeconds,omitempty"`
|
||||
PodStatusPhases []string `json:"podStatusPhases,omitempty"`
|
||||
}
|
||||
|
||||
type FailedPods struct {
|
||||
ExcludeOwnerKinds []string `json:"excludeOwnerKinds,omitempty"`
|
||||
MinPodLifetimeSeconds *uint `json:"minPodLifetimeSeconds,omitempty"`
|
||||
Reasons []string `json:"reasons,omitempty"`
|
||||
IncludingInitContainers bool `json:"includingInitContainers,omitempty"`
|
||||
}
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
//go:build !ignore_autogenerated
|
||||
// +build !ignore_autogenerated
|
||||
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -24,7 +23,6 @@ package v1alpha1
|
||||
import (
|
||||
unsafe "unsafe"
|
||||
|
||||
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
conversion "k8s.io/apimachinery/pkg/conversion"
|
||||
runtime "k8s.io/apimachinery/pkg/runtime"
|
||||
api "sigs.k8s.io/descheduler/pkg/api"
|
||||
@@ -57,26 +55,6 @@ func RegisterConversions(s *runtime.Scheme) error {
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*FailedPods)(nil), (*api.FailedPods)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_v1alpha1_FailedPods_To_api_FailedPods(a.(*FailedPods), b.(*api.FailedPods), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*api.FailedPods)(nil), (*FailedPods)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_api_FailedPods_To_v1alpha1_FailedPods(a.(*api.FailedPods), b.(*FailedPods), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*Namespaces)(nil), (*api.Namespaces)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_v1alpha1_Namespaces_To_api_Namespaces(a.(*Namespaces), b.(*api.Namespaces), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*api.Namespaces)(nil), (*Namespaces)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_api_Namespaces_To_v1alpha1_Namespaces(a.(*api.Namespaces), b.(*Namespaces), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*NodeResourceUtilizationThresholds)(nil), (*api.NodeResourceUtilizationThresholds)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_v1alpha1_NodeResourceUtilizationThresholds_To_api_NodeResourceUtilizationThresholds(a.(*NodeResourceUtilizationThresholds), b.(*api.NodeResourceUtilizationThresholds), scope)
|
||||
}); err != nil {
|
||||
@@ -87,16 +65,6 @@ func RegisterConversions(s *runtime.Scheme) error {
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*PodLifeTime)(nil), (*api.PodLifeTime)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_v1alpha1_PodLifeTime_To_api_PodLifeTime(a.(*PodLifeTime), b.(*api.PodLifeTime), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*api.PodLifeTime)(nil), (*PodLifeTime)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_api_PodLifeTime_To_v1alpha1_PodLifeTime(a.(*api.PodLifeTime), b.(*PodLifeTime), scope)
|
||||
}); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := s.AddGeneratedConversionFunc((*PodsHavingTooManyRestarts)(nil), (*api.PodsHavingTooManyRestarts)(nil), func(a, b interface{}, scope conversion.Scope) error {
|
||||
return Convert_v1alpha1_PodsHavingTooManyRestarts_To_api_PodsHavingTooManyRestarts(a.(*PodsHavingTooManyRestarts), b.(*api.PodsHavingTooManyRestarts), scope)
|
||||
}); err != nil {
|
||||
@@ -132,25 +100,6 @@ func RegisterConversions(s *runtime.Scheme) error {
|
||||
|
||||
func autoConvert_v1alpha1_DeschedulerPolicy_To_api_DeschedulerPolicy(in *DeschedulerPolicy, out *api.DeschedulerPolicy, s conversion.Scope) error {
|
||||
out.Strategies = *(*api.StrategyList)(unsafe.Pointer(&in.Strategies))
|
||||
out.NodeSelector = (*string)(unsafe.Pointer(in.NodeSelector))
|
||||
out.EvictFailedBarePods = (*bool)(unsafe.Pointer(in.EvictFailedBarePods))
|
||||
out.EvictLocalStoragePods = (*bool)(unsafe.Pointer(in.EvictLocalStoragePods))
|
||||
out.EvictSystemCriticalPods = (*bool)(unsafe.Pointer(in.EvictSystemCriticalPods))
|
||||
out.IgnorePVCPods = (*bool)(unsafe.Pointer(in.IgnorePVCPods))
|
||||
if in.MaxNoOfPodsToEvictPerNode != nil {
|
||||
in, out := &in.MaxNoOfPodsToEvictPerNode, &out.MaxNoOfPodsToEvictPerNode
|
||||
*out = new(uint)
|
||||
**out = uint(**in)
|
||||
} else {
|
||||
out.MaxNoOfPodsToEvictPerNode = nil
|
||||
}
|
||||
if in.MaxNoOfPodsToEvictPerNamespace != nil {
|
||||
in, out := &in.MaxNoOfPodsToEvictPerNamespace, &out.MaxNoOfPodsToEvictPerNamespace
|
||||
*out = new(uint)
|
||||
**out = uint(**in)
|
||||
} else {
|
||||
out.MaxNoOfPodsToEvictPerNamespace = nil
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -161,25 +110,6 @@ func Convert_v1alpha1_DeschedulerPolicy_To_api_DeschedulerPolicy(in *Descheduler
|
||||
|
||||
func autoConvert_api_DeschedulerPolicy_To_v1alpha1_DeschedulerPolicy(in *api.DeschedulerPolicy, out *DeschedulerPolicy, s conversion.Scope) error {
|
||||
out.Strategies = *(*StrategyList)(unsafe.Pointer(&in.Strategies))
|
||||
out.NodeSelector = (*string)(unsafe.Pointer(in.NodeSelector))
|
||||
out.EvictFailedBarePods = (*bool)(unsafe.Pointer(in.EvictFailedBarePods))
|
||||
out.EvictLocalStoragePods = (*bool)(unsafe.Pointer(in.EvictLocalStoragePods))
|
||||
out.EvictSystemCriticalPods = (*bool)(unsafe.Pointer(in.EvictSystemCriticalPods))
|
||||
out.IgnorePVCPods = (*bool)(unsafe.Pointer(in.IgnorePVCPods))
|
||||
if in.MaxNoOfPodsToEvictPerNode != nil {
|
||||
in, out := &in.MaxNoOfPodsToEvictPerNode, &out.MaxNoOfPodsToEvictPerNode
|
||||
*out = new(int)
|
||||
**out = int(**in)
|
||||
} else {
|
||||
out.MaxNoOfPodsToEvictPerNode = nil
|
||||
}
|
||||
if in.MaxNoOfPodsToEvictPerNamespace != nil {
|
||||
in, out := &in.MaxNoOfPodsToEvictPerNamespace, &out.MaxNoOfPodsToEvictPerNamespace
|
||||
*out = new(int)
|
||||
**out = int(**in)
|
||||
} else {
|
||||
out.MaxNoOfPodsToEvictPerNamespace = nil
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -191,7 +121,9 @@ func Convert_api_DeschedulerPolicy_To_v1alpha1_DeschedulerPolicy(in *api.Desched
|
||||
func autoConvert_v1alpha1_DeschedulerStrategy_To_api_DeschedulerStrategy(in *DeschedulerStrategy, out *api.DeschedulerStrategy, s conversion.Scope) error {
|
||||
out.Enabled = in.Enabled
|
||||
out.Weight = in.Weight
|
||||
out.Params = (*api.StrategyParameters)(unsafe.Pointer(in.Params))
|
||||
if err := Convert_v1alpha1_StrategyParameters_To_api_StrategyParameters(&in.Params, &out.Params, s); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -203,7 +135,9 @@ func Convert_v1alpha1_DeschedulerStrategy_To_api_DeschedulerStrategy(in *Desched
|
||||
func autoConvert_api_DeschedulerStrategy_To_v1alpha1_DeschedulerStrategy(in *api.DeschedulerStrategy, out *DeschedulerStrategy, s conversion.Scope) error {
|
||||
out.Enabled = in.Enabled
|
||||
out.Weight = in.Weight
|
||||
out.Params = (*StrategyParameters)(unsafe.Pointer(in.Params))
|
||||
if err := Convert_api_StrategyParameters_To_v1alpha1_StrategyParameters(&in.Params, &out.Params, s); err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -212,56 +146,7 @@ func Convert_api_DeschedulerStrategy_To_v1alpha1_DeschedulerStrategy(in *api.Des
|
||||
return autoConvert_api_DeschedulerStrategy_To_v1alpha1_DeschedulerStrategy(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_v1alpha1_FailedPods_To_api_FailedPods(in *FailedPods, out *api.FailedPods, s conversion.Scope) error {
|
||||
out.ExcludeOwnerKinds = *(*[]string)(unsafe.Pointer(&in.ExcludeOwnerKinds))
|
||||
out.MinPodLifetimeSeconds = (*uint)(unsafe.Pointer(in.MinPodLifetimeSeconds))
|
||||
out.Reasons = *(*[]string)(unsafe.Pointer(&in.Reasons))
|
||||
out.IncludingInitContainers = in.IncludingInitContainers
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_v1alpha1_FailedPods_To_api_FailedPods is an autogenerated conversion function.
|
||||
func Convert_v1alpha1_FailedPods_To_api_FailedPods(in *FailedPods, out *api.FailedPods, s conversion.Scope) error {
|
||||
return autoConvert_v1alpha1_FailedPods_To_api_FailedPods(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_api_FailedPods_To_v1alpha1_FailedPods(in *api.FailedPods, out *FailedPods, s conversion.Scope) error {
|
||||
out.ExcludeOwnerKinds = *(*[]string)(unsafe.Pointer(&in.ExcludeOwnerKinds))
|
||||
out.MinPodLifetimeSeconds = (*uint)(unsafe.Pointer(in.MinPodLifetimeSeconds))
|
||||
out.Reasons = *(*[]string)(unsafe.Pointer(&in.Reasons))
|
||||
out.IncludingInitContainers = in.IncludingInitContainers
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_api_FailedPods_To_v1alpha1_FailedPods is an autogenerated conversion function.
|
||||
func Convert_api_FailedPods_To_v1alpha1_FailedPods(in *api.FailedPods, out *FailedPods, s conversion.Scope) error {
|
||||
return autoConvert_api_FailedPods_To_v1alpha1_FailedPods(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_v1alpha1_Namespaces_To_api_Namespaces(in *Namespaces, out *api.Namespaces, s conversion.Scope) error {
|
||||
out.Include = *(*[]string)(unsafe.Pointer(&in.Include))
|
||||
out.Exclude = *(*[]string)(unsafe.Pointer(&in.Exclude))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_v1alpha1_Namespaces_To_api_Namespaces is an autogenerated conversion function.
|
||||
func Convert_v1alpha1_Namespaces_To_api_Namespaces(in *Namespaces, out *api.Namespaces, s conversion.Scope) error {
|
||||
return autoConvert_v1alpha1_Namespaces_To_api_Namespaces(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_api_Namespaces_To_v1alpha1_Namespaces(in *api.Namespaces, out *Namespaces, s conversion.Scope) error {
|
||||
out.Include = *(*[]string)(unsafe.Pointer(&in.Include))
|
||||
out.Exclude = *(*[]string)(unsafe.Pointer(&in.Exclude))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_api_Namespaces_To_v1alpha1_Namespaces is an autogenerated conversion function.
|
||||
func Convert_api_Namespaces_To_v1alpha1_Namespaces(in *api.Namespaces, out *Namespaces, s conversion.Scope) error {
|
||||
return autoConvert_api_Namespaces_To_v1alpha1_Namespaces(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_v1alpha1_NodeResourceUtilizationThresholds_To_api_NodeResourceUtilizationThresholds(in *NodeResourceUtilizationThresholds, out *api.NodeResourceUtilizationThresholds, s conversion.Scope) error {
|
||||
out.UseDeviationThresholds = in.UseDeviationThresholds
|
||||
out.Thresholds = *(*api.ResourceThresholds)(unsafe.Pointer(&in.Thresholds))
|
||||
out.TargetThresholds = *(*api.ResourceThresholds)(unsafe.Pointer(&in.TargetThresholds))
|
||||
out.NumberOfNodes = in.NumberOfNodes
|
||||
@@ -274,7 +159,6 @@ func Convert_v1alpha1_NodeResourceUtilizationThresholds_To_api_NodeResourceUtili
|
||||
}
|
||||
|
||||
func autoConvert_api_NodeResourceUtilizationThresholds_To_v1alpha1_NodeResourceUtilizationThresholds(in *api.NodeResourceUtilizationThresholds, out *NodeResourceUtilizationThresholds, s conversion.Scope) error {
|
||||
out.UseDeviationThresholds = in.UseDeviationThresholds
|
||||
out.Thresholds = *(*ResourceThresholds)(unsafe.Pointer(&in.Thresholds))
|
||||
out.TargetThresholds = *(*ResourceThresholds)(unsafe.Pointer(&in.TargetThresholds))
|
||||
out.NumberOfNodes = in.NumberOfNodes
|
||||
@@ -286,28 +170,6 @@ func Convert_api_NodeResourceUtilizationThresholds_To_v1alpha1_NodeResourceUtili
|
||||
return autoConvert_api_NodeResourceUtilizationThresholds_To_v1alpha1_NodeResourceUtilizationThresholds(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_v1alpha1_PodLifeTime_To_api_PodLifeTime(in *PodLifeTime, out *api.PodLifeTime, s conversion.Scope) error {
|
||||
out.MaxPodLifeTimeSeconds = (*uint)(unsafe.Pointer(in.MaxPodLifeTimeSeconds))
|
||||
out.PodStatusPhases = *(*[]string)(unsafe.Pointer(&in.PodStatusPhases))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_v1alpha1_PodLifeTime_To_api_PodLifeTime is an autogenerated conversion function.
|
||||
func Convert_v1alpha1_PodLifeTime_To_api_PodLifeTime(in *PodLifeTime, out *api.PodLifeTime, s conversion.Scope) error {
|
||||
return autoConvert_v1alpha1_PodLifeTime_To_api_PodLifeTime(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_api_PodLifeTime_To_v1alpha1_PodLifeTime(in *api.PodLifeTime, out *PodLifeTime, s conversion.Scope) error {
|
||||
out.MaxPodLifeTimeSeconds = (*uint)(unsafe.Pointer(in.MaxPodLifeTimeSeconds))
|
||||
out.PodStatusPhases = *(*[]string)(unsafe.Pointer(&in.PodStatusPhases))
|
||||
return nil
|
||||
}
|
||||
|
||||
// Convert_api_PodLifeTime_To_v1alpha1_PodLifeTime is an autogenerated conversion function.
|
||||
func Convert_api_PodLifeTime_To_v1alpha1_PodLifeTime(in *api.PodLifeTime, out *PodLifeTime, s conversion.Scope) error {
|
||||
return autoConvert_api_PodLifeTime_To_v1alpha1_PodLifeTime(in, out, s)
|
||||
}
|
||||
|
||||
func autoConvert_v1alpha1_PodsHavingTooManyRestarts_To_api_PodsHavingTooManyRestarts(in *PodsHavingTooManyRestarts, out *api.PodsHavingTooManyRestarts, s conversion.Scope) error {
|
||||
out.PodRestartThreshold = in.PodRestartThreshold
|
||||
out.IncludingInitContainers = in.IncludingInitContainers
|
||||
@@ -354,17 +216,8 @@ func autoConvert_v1alpha1_StrategyParameters_To_api_StrategyParameters(in *Strat
|
||||
out.NodeResourceUtilizationThresholds = (*api.NodeResourceUtilizationThresholds)(unsafe.Pointer(in.NodeResourceUtilizationThresholds))
|
||||
out.NodeAffinityType = *(*[]string)(unsafe.Pointer(&in.NodeAffinityType))
|
||||
out.PodsHavingTooManyRestarts = (*api.PodsHavingTooManyRestarts)(unsafe.Pointer(in.PodsHavingTooManyRestarts))
|
||||
out.PodLifeTime = (*api.PodLifeTime)(unsafe.Pointer(in.PodLifeTime))
|
||||
out.MaxPodLifeTimeSeconds = (*uint)(unsafe.Pointer(in.MaxPodLifeTimeSeconds))
|
||||
out.RemoveDuplicates = (*api.RemoveDuplicates)(unsafe.Pointer(in.RemoveDuplicates))
|
||||
out.FailedPods = (*api.FailedPods)(unsafe.Pointer(in.FailedPods))
|
||||
out.IncludeSoftConstraints = in.IncludeSoftConstraints
|
||||
out.Namespaces = (*api.Namespaces)(unsafe.Pointer(in.Namespaces))
|
||||
out.ThresholdPriority = (*int32)(unsafe.Pointer(in.ThresholdPriority))
|
||||
out.ThresholdPriorityClassName = in.ThresholdPriorityClassName
|
||||
out.LabelSelector = (*v1.LabelSelector)(unsafe.Pointer(in.LabelSelector))
|
||||
out.NodeFit = in.NodeFit
|
||||
out.IncludePreferNoSchedule = in.IncludePreferNoSchedule
|
||||
out.ExcludedTaints = *(*[]string)(unsafe.Pointer(&in.ExcludedTaints))
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -377,17 +230,8 @@ func autoConvert_api_StrategyParameters_To_v1alpha1_StrategyParameters(in *api.S
|
||||
out.NodeResourceUtilizationThresholds = (*NodeResourceUtilizationThresholds)(unsafe.Pointer(in.NodeResourceUtilizationThresholds))
|
||||
out.NodeAffinityType = *(*[]string)(unsafe.Pointer(&in.NodeAffinityType))
|
||||
out.PodsHavingTooManyRestarts = (*PodsHavingTooManyRestarts)(unsafe.Pointer(in.PodsHavingTooManyRestarts))
|
||||
out.PodLifeTime = (*PodLifeTime)(unsafe.Pointer(in.PodLifeTime))
|
||||
out.MaxPodLifeTimeSeconds = (*uint)(unsafe.Pointer(in.MaxPodLifeTimeSeconds))
|
||||
out.RemoveDuplicates = (*RemoveDuplicates)(unsafe.Pointer(in.RemoveDuplicates))
|
||||
out.FailedPods = (*FailedPods)(unsafe.Pointer(in.FailedPods))
|
||||
out.IncludeSoftConstraints = in.IncludeSoftConstraints
|
||||
out.Namespaces = (*Namespaces)(unsafe.Pointer(in.Namespaces))
|
||||
out.ThresholdPriority = (*int32)(unsafe.Pointer(in.ThresholdPriority))
|
||||
out.ThresholdPriorityClassName = in.ThresholdPriorityClassName
|
||||
out.LabelSelector = (*v1.LabelSelector)(unsafe.Pointer(in.LabelSelector))
|
||||
out.NodeFit = in.NodeFit
|
||||
out.IncludePreferNoSchedule = in.IncludePreferNoSchedule
|
||||
out.ExcludedTaints = *(*[]string)(unsafe.Pointer(&in.ExcludedTaints))
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
//go:build !ignore_autogenerated
|
||||
// +build !ignore_autogenerated
|
||||
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -22,7 +21,6 @@ limitations under the License.
|
||||
package v1alpha1
|
||||
|
||||
import (
|
||||
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
runtime "k8s.io/apimachinery/pkg/runtime"
|
||||
)
|
||||
|
||||
@@ -37,41 +35,6 @@ func (in *DeschedulerPolicy) DeepCopyInto(out *DeschedulerPolicy) {
|
||||
(*out)[key] = *val.DeepCopy()
|
||||
}
|
||||
}
|
||||
if in.NodeSelector != nil {
|
||||
in, out := &in.NodeSelector, &out.NodeSelector
|
||||
*out = new(string)
|
||||
**out = **in
|
||||
}
|
||||
if in.EvictFailedBarePods != nil {
|
||||
in, out := &in.EvictFailedBarePods, &out.EvictFailedBarePods
|
||||
*out = new(bool)
|
||||
**out = **in
|
||||
}
|
||||
if in.EvictLocalStoragePods != nil {
|
||||
in, out := &in.EvictLocalStoragePods, &out.EvictLocalStoragePods
|
||||
*out = new(bool)
|
||||
**out = **in
|
||||
}
|
||||
if in.EvictSystemCriticalPods != nil {
|
||||
in, out := &in.EvictSystemCriticalPods, &out.EvictSystemCriticalPods
|
||||
*out = new(bool)
|
||||
**out = **in
|
||||
}
|
||||
if in.IgnorePVCPods != nil {
|
||||
in, out := &in.IgnorePVCPods, &out.IgnorePVCPods
|
||||
*out = new(bool)
|
||||
**out = **in
|
||||
}
|
||||
if in.MaxNoOfPodsToEvictPerNode != nil {
|
||||
in, out := &in.MaxNoOfPodsToEvictPerNode, &out.MaxNoOfPodsToEvictPerNode
|
||||
*out = new(int)
|
||||
**out = **in
|
||||
}
|
||||
if in.MaxNoOfPodsToEvictPerNamespace != nil {
|
||||
in, out := &in.MaxNoOfPodsToEvictPerNamespace, &out.MaxNoOfPodsToEvictPerNamespace
|
||||
*out = new(int)
|
||||
**out = **in
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
@@ -96,11 +59,7 @@ func (in *DeschedulerPolicy) DeepCopyObject() runtime.Object {
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *DeschedulerStrategy) DeepCopyInto(out *DeschedulerStrategy) {
|
||||
*out = *in
|
||||
if in.Params != nil {
|
||||
in, out := &in.Params, &out.Params
|
||||
*out = new(StrategyParameters)
|
||||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
in.Params.DeepCopyInto(&out.Params)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -114,63 +73,6 @@ func (in *DeschedulerStrategy) DeepCopy() *DeschedulerStrategy {
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *FailedPods) DeepCopyInto(out *FailedPods) {
|
||||
*out = *in
|
||||
if in.ExcludeOwnerKinds != nil {
|
||||
in, out := &in.ExcludeOwnerKinds, &out.ExcludeOwnerKinds
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
if in.MinPodLifetimeSeconds != nil {
|
||||
in, out := &in.MinPodLifetimeSeconds, &out.MinPodLifetimeSeconds
|
||||
*out = new(uint)
|
||||
**out = **in
|
||||
}
|
||||
if in.Reasons != nil {
|
||||
in, out := &in.Reasons, &out.Reasons
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FailedPods.
|
||||
func (in *FailedPods) DeepCopy() *FailedPods {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(FailedPods)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *Namespaces) DeepCopyInto(out *Namespaces) {
|
||||
*out = *in
|
||||
if in.Include != nil {
|
||||
in, out := &in.Include, &out.Include
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
if in.Exclude != nil {
|
||||
in, out := &in.Exclude, &out.Exclude
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Namespaces.
|
||||
func (in *Namespaces) DeepCopy() *Namespaces {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(Namespaces)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *NodeResourceUtilizationThresholds) DeepCopyInto(out *NodeResourceUtilizationThresholds) {
|
||||
*out = *in
|
||||
@@ -201,32 +103,6 @@ func (in *NodeResourceUtilizationThresholds) DeepCopy() *NodeResourceUtilization
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *PodLifeTime) DeepCopyInto(out *PodLifeTime) {
|
||||
*out = *in
|
||||
if in.MaxPodLifeTimeSeconds != nil {
|
||||
in, out := &in.MaxPodLifeTimeSeconds, &out.MaxPodLifeTimeSeconds
|
||||
*out = new(uint)
|
||||
**out = **in
|
||||
}
|
||||
if in.PodStatusPhases != nil {
|
||||
in, out := &in.PodStatusPhases, &out.PodStatusPhases
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodLifeTime.
|
||||
func (in *PodLifeTime) DeepCopy() *PodLifeTime {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(PodLifeTime)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *PodsHavingTooManyRestarts) DeepCopyInto(out *PodsHavingTooManyRestarts) {
|
||||
*out = *in
|
||||
@@ -326,41 +202,16 @@ func (in *StrategyParameters) DeepCopyInto(out *StrategyParameters) {
|
||||
*out = new(PodsHavingTooManyRestarts)
|
||||
**out = **in
|
||||
}
|
||||
if in.PodLifeTime != nil {
|
||||
in, out := &in.PodLifeTime, &out.PodLifeTime
|
||||
*out = new(PodLifeTime)
|
||||
(*in).DeepCopyInto(*out)
|
||||
if in.MaxPodLifeTimeSeconds != nil {
|
||||
in, out := &in.MaxPodLifeTimeSeconds, &out.MaxPodLifeTimeSeconds
|
||||
*out = new(uint)
|
||||
**out = **in
|
||||
}
|
||||
if in.RemoveDuplicates != nil {
|
||||
in, out := &in.RemoveDuplicates, &out.RemoveDuplicates
|
||||
*out = new(RemoveDuplicates)
|
||||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
if in.FailedPods != nil {
|
||||
in, out := &in.FailedPods, &out.FailedPods
|
||||
*out = new(FailedPods)
|
||||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
if in.Namespaces != nil {
|
||||
in, out := &in.Namespaces, &out.Namespaces
|
||||
*out = new(Namespaces)
|
||||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
if in.ThresholdPriority != nil {
|
||||
in, out := &in.ThresholdPriority, &out.ThresholdPriority
|
||||
*out = new(int32)
|
||||
**out = **in
|
||||
}
|
||||
if in.LabelSelector != nil {
|
||||
in, out := &in.LabelSelector, &out.LabelSelector
|
||||
*out = new(v1.LabelSelector)
|
||||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
if in.ExcludedTaints != nil {
|
||||
in, out := &in.ExcludedTaints, &out.ExcludedTaints
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
//go:build !ignore_autogenerated
|
||||
// +build !ignore_autogenerated
|
||||
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
//go:build !ignore_autogenerated
|
||||
// +build !ignore_autogenerated
|
||||
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -22,7 +21,6 @@ limitations under the License.
|
||||
package api
|
||||
|
||||
import (
|
||||
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
runtime "k8s.io/apimachinery/pkg/runtime"
|
||||
)
|
||||
|
||||
@@ -37,41 +35,6 @@ func (in *DeschedulerPolicy) DeepCopyInto(out *DeschedulerPolicy) {
|
||||
(*out)[key] = *val.DeepCopy()
|
||||
}
|
||||
}
|
||||
if in.NodeSelector != nil {
|
||||
in, out := &in.NodeSelector, &out.NodeSelector
|
||||
*out = new(string)
|
||||
**out = **in
|
||||
}
|
||||
if in.EvictFailedBarePods != nil {
|
||||
in, out := &in.EvictFailedBarePods, &out.EvictFailedBarePods
|
||||
*out = new(bool)
|
||||
**out = **in
|
||||
}
|
||||
if in.EvictLocalStoragePods != nil {
|
||||
in, out := &in.EvictLocalStoragePods, &out.EvictLocalStoragePods
|
||||
*out = new(bool)
|
||||
**out = **in
|
||||
}
|
||||
if in.EvictSystemCriticalPods != nil {
|
||||
in, out := &in.EvictSystemCriticalPods, &out.EvictSystemCriticalPods
|
||||
*out = new(bool)
|
||||
**out = **in
|
||||
}
|
||||
if in.IgnorePVCPods != nil {
|
||||
in, out := &in.IgnorePVCPods, &out.IgnorePVCPods
|
||||
*out = new(bool)
|
||||
**out = **in
|
||||
}
|
||||
if in.MaxNoOfPodsToEvictPerNode != nil {
|
||||
in, out := &in.MaxNoOfPodsToEvictPerNode, &out.MaxNoOfPodsToEvictPerNode
|
||||
*out = new(uint)
|
||||
**out = **in
|
||||
}
|
||||
if in.MaxNoOfPodsToEvictPerNamespace != nil {
|
||||
in, out := &in.MaxNoOfPodsToEvictPerNamespace, &out.MaxNoOfPodsToEvictPerNamespace
|
||||
*out = new(uint)
|
||||
**out = **in
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
@@ -96,11 +59,7 @@ func (in *DeschedulerPolicy) DeepCopyObject() runtime.Object {
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *DeschedulerStrategy) DeepCopyInto(out *DeschedulerStrategy) {
|
||||
*out = *in
|
||||
if in.Params != nil {
|
||||
in, out := &in.Params, &out.Params
|
||||
*out = new(StrategyParameters)
|
||||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
in.Params.DeepCopyInto(&out.Params)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -114,63 +73,6 @@ func (in *DeschedulerStrategy) DeepCopy() *DeschedulerStrategy {
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *FailedPods) DeepCopyInto(out *FailedPods) {
|
||||
*out = *in
|
||||
if in.ExcludeOwnerKinds != nil {
|
||||
in, out := &in.ExcludeOwnerKinds, &out.ExcludeOwnerKinds
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
if in.MinPodLifetimeSeconds != nil {
|
||||
in, out := &in.MinPodLifetimeSeconds, &out.MinPodLifetimeSeconds
|
||||
*out = new(uint)
|
||||
**out = **in
|
||||
}
|
||||
if in.Reasons != nil {
|
||||
in, out := &in.Reasons, &out.Reasons
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FailedPods.
|
||||
func (in *FailedPods) DeepCopy() *FailedPods {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(FailedPods)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *Namespaces) DeepCopyInto(out *Namespaces) {
|
||||
*out = *in
|
||||
if in.Include != nil {
|
||||
in, out := &in.Include, &out.Include
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
if in.Exclude != nil {
|
||||
in, out := &in.Exclude, &out.Exclude
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Namespaces.
|
||||
func (in *Namespaces) DeepCopy() *Namespaces {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(Namespaces)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *NodeResourceUtilizationThresholds) DeepCopyInto(out *NodeResourceUtilizationThresholds) {
|
||||
*out = *in
|
||||
@@ -201,32 +103,6 @@ func (in *NodeResourceUtilizationThresholds) DeepCopy() *NodeResourceUtilization
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *PodLifeTime) DeepCopyInto(out *PodLifeTime) {
|
||||
*out = *in
|
||||
if in.MaxPodLifeTimeSeconds != nil {
|
||||
in, out := &in.MaxPodLifeTimeSeconds, &out.MaxPodLifeTimeSeconds
|
||||
*out = new(uint)
|
||||
**out = **in
|
||||
}
|
||||
if in.PodStatusPhases != nil {
|
||||
in, out := &in.PodStatusPhases, &out.PodStatusPhases
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodLifeTime.
|
||||
func (in *PodLifeTime) DeepCopy() *PodLifeTime {
|
||||
if in == nil {
|
||||
return nil
|
||||
}
|
||||
out := new(PodLifeTime)
|
||||
in.DeepCopyInto(out)
|
||||
return out
|
||||
}
|
||||
|
||||
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
|
||||
func (in *PodsHavingTooManyRestarts) DeepCopyInto(out *PodsHavingTooManyRestarts) {
|
||||
*out = *in
|
||||
@@ -326,41 +202,16 @@ func (in *StrategyParameters) DeepCopyInto(out *StrategyParameters) {
|
||||
*out = new(PodsHavingTooManyRestarts)
|
||||
**out = **in
|
||||
}
|
||||
if in.PodLifeTime != nil {
|
||||
in, out := &in.PodLifeTime, &out.PodLifeTime
|
||||
*out = new(PodLifeTime)
|
||||
(*in).DeepCopyInto(*out)
|
||||
if in.MaxPodLifeTimeSeconds != nil {
|
||||
in, out := &in.MaxPodLifeTimeSeconds, &out.MaxPodLifeTimeSeconds
|
||||
*out = new(uint)
|
||||
**out = **in
|
||||
}
|
||||
if in.RemoveDuplicates != nil {
|
||||
in, out := &in.RemoveDuplicates, &out.RemoveDuplicates
|
||||
*out = new(RemoveDuplicates)
|
||||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
if in.FailedPods != nil {
|
||||
in, out := &in.FailedPods, &out.FailedPods
|
||||
*out = new(FailedPods)
|
||||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
if in.Namespaces != nil {
|
||||
in, out := &in.Namespaces, &out.Namespaces
|
||||
*out = new(Namespaces)
|
||||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
if in.ThresholdPriority != nil {
|
||||
in, out := &in.ThresholdPriority, &out.ThresholdPriority
|
||||
*out = new(int32)
|
||||
**out = **in
|
||||
}
|
||||
if in.LabelSelector != nil {
|
||||
in, out := &in.LabelSelector, &out.LabelSelector
|
||||
*out = new(v1.LabelSelector)
|
||||
(*in).DeepCopyInto(*out)
|
||||
}
|
||||
if in.ExcludedTaints != nil {
|
||||
in, out := &in.ExcludedTaints, &out.ExcludedTaints
|
||||
*out = make([]string, len(*in))
|
||||
copy(*out, *in)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -19,6 +19,8 @@ package componentconfig
|
||||
import (
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/scheme"
|
||||
)
|
||||
|
||||
var (
|
||||
@@ -32,6 +34,12 @@ const GroupName = "deschedulercomponentconfig"
|
||||
// SchemeGroupVersion is group version used to register these objects
|
||||
var SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: runtime.APIVersionInternal}
|
||||
|
||||
func init() {
|
||||
if err := addKnownTypes(scheme.Scheme); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
// Kind takes an unqualified kind and returns a Group qualified GroupKind
|
||||
func Kind(kind string) schema.GroupKind {
|
||||
return SchemeGroupVersion.WithKind(kind).GroupKind()
|
||||
|
||||
@@ -20,7 +20,6 @@ import (
|
||||
"time"
|
||||
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
componentbaseconfig "k8s.io/component-base/config"
|
||||
)
|
||||
|
||||
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
|
||||
@@ -49,14 +48,4 @@ type DeschedulerConfiguration struct {
|
||||
|
||||
// EvictLocalStoragePods allows pods using local storage to be evicted.
|
||||
EvictLocalStoragePods bool
|
||||
|
||||
// IgnorePVCPods sets whether PVC pods should be allowed to be evicted
|
||||
IgnorePVCPods bool
|
||||
|
||||
// LeaderElection starts Deployment using leader election loop
|
||||
LeaderElection componentbaseconfig.LeaderElectionConfiguration
|
||||
|
||||
// Logging specifies the options of logging.
|
||||
// Refer [Logs Options](https://github.com/kubernetes/component-base/blob/master/logs/options.go) for more information.
|
||||
Logging componentbaseconfig.LoggingConfiguration
|
||||
}
|
||||
|
||||
@@ -19,8 +19,6 @@ package v1alpha1
|
||||
import (
|
||||
"time"
|
||||
|
||||
componentbaseconfig "k8s.io/component-base/config"
|
||||
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
)
|
||||
|
||||
@@ -50,14 +48,4 @@ type DeschedulerConfiguration struct {
|
||||
|
||||
// EvictLocalStoragePods allows pods using local storage to be evicted.
|
||||
EvictLocalStoragePods bool `json:"evictLocalStoragePods,omitempty"`
|
||||
|
||||
// IgnorePVCPods sets whether PVC pods should be allowed to be evicted
|
||||
IgnorePVCPods bool `json:"ignorePvcPods,omitempty"`
|
||||
|
||||
// LeaderElection starts Deployment using leader election loop
|
||||
LeaderElection componentbaseconfig.LeaderElectionConfiguration `json:"leaderElection,omitempty"`
|
||||
|
||||
// Logging specifies the options of logging.
|
||||
// Refer [Logs Options](https://github.com/kubernetes/component-base/blob/master/logs/options.go) for more information.
|
||||
Logging componentbaseconfig.LoggingConfiguration `json:"logging,omitempty"`
|
||||
}
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
//go:build !ignore_autogenerated
|
||||
// +build !ignore_autogenerated
|
||||
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -57,9 +56,6 @@ func autoConvert_v1alpha1_DeschedulerConfiguration_To_componentconfig_Deschedule
|
||||
out.NodeSelector = in.NodeSelector
|
||||
out.MaxNoOfPodsToEvictPerNode = in.MaxNoOfPodsToEvictPerNode
|
||||
out.EvictLocalStoragePods = in.EvictLocalStoragePods
|
||||
out.IgnorePVCPods = in.IgnorePVCPods
|
||||
out.LeaderElection = in.LeaderElection
|
||||
out.Logging = in.Logging
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -76,9 +72,6 @@ func autoConvert_componentconfig_DeschedulerConfiguration_To_v1alpha1_Deschedule
|
||||
out.NodeSelector = in.NodeSelector
|
||||
out.MaxNoOfPodsToEvictPerNode = in.MaxNoOfPodsToEvictPerNode
|
||||
out.EvictLocalStoragePods = in.EvictLocalStoragePods
|
||||
out.IgnorePVCPods = in.IgnorePVCPods
|
||||
out.LeaderElection = in.LeaderElection
|
||||
out.Logging = in.Logging
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
//go:build !ignore_autogenerated
|
||||
// +build !ignore_autogenerated
|
||||
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -29,8 +28,6 @@ import (
|
||||
func (in *DeschedulerConfiguration) DeepCopyInto(out *DeschedulerConfiguration) {
|
||||
*out = *in
|
||||
out.TypeMeta = in.TypeMeta
|
||||
out.LeaderElection = in.LeaderElection
|
||||
in.Logging.DeepCopyInto(&out.Logging)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
//go:build !ignore_autogenerated
|
||||
// +build !ignore_autogenerated
|
||||
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
//go:build !ignore_autogenerated
|
||||
// +build !ignore_autogenerated
|
||||
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
Copyright 2020 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
@@ -29,8 +28,6 @@ import (
|
||||
func (in *DeschedulerConfiguration) DeepCopyInto(out *DeschedulerConfiguration) {
|
||||
*out = *in
|
||||
out.TypeMeta = in.TypeMeta
|
||||
out.LeaderElection = in.LeaderElection
|
||||
in.Logging.DeepCopyInto(&out.Logging)
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
@@ -20,36 +20,23 @@ import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
policy "k8s.io/api/policy/v1beta1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/api/core/v1"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/klog"
|
||||
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
"k8s.io/client-go/informers"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
fakeclientset "k8s.io/client-go/kubernetes/fake"
|
||||
core "k8s.io/client-go/testing"
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
corev1informers "k8s.io/client-go/informers/core/v1"
|
||||
schedulingv1informers "k8s.io/client-go/informers/scheduling/v1"
|
||||
|
||||
"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
|
||||
"sigs.k8s.io/descheduler/metrics"
|
||||
"sigs.k8s.io/descheduler/pkg/api"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/client"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
|
||||
eutils "sigs.k8s.io/descheduler/pkg/descheduler/evictions/utils"
|
||||
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
|
||||
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/strategies"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/strategies/nodeutilization"
|
||||
)
|
||||
|
||||
func Run(ctx context.Context, rs *options.DeschedulerServer) error {
|
||||
metrics.Register()
|
||||
|
||||
func Run(rs *options.DeschedulerServer) error {
|
||||
ctx := context.Background()
|
||||
rsclient, err := client.CreateClient(rs.KubeconfigFile)
|
||||
if err != nil {
|
||||
return err
|
||||
@@ -69,245 +56,62 @@ func Run(ctx context.Context, rs *options.DeschedulerServer) error {
|
||||
return err
|
||||
}
|
||||
|
||||
runFn := func() error {
|
||||
return RunDeschedulerStrategies(ctx, rs, deschedulerPolicy, evictionPolicyGroupVersion)
|
||||
}
|
||||
|
||||
if rs.LeaderElection.LeaderElect && rs.DeschedulingInterval.Seconds() == 0 {
|
||||
return fmt.Errorf("leaderElection must be used with deschedulingInterval")
|
||||
}
|
||||
|
||||
if rs.LeaderElection.LeaderElect && !rs.DryRun {
|
||||
if err := NewLeaderElection(runFn, rsclient, &rs.LeaderElection, ctx); err != nil {
|
||||
return fmt.Errorf("leaderElection: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
return runFn()
|
||||
stopChannel := make(chan struct{})
|
||||
return RunDeschedulerStrategies(ctx, rs, deschedulerPolicy, evictionPolicyGroupVersion, stopChannel)
|
||||
}
|
||||
|
||||
type strategyFunction func(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc)
|
||||
type strategyFunction func(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, evictLocalStoragePods bool, podEvictor *evictions.PodEvictor)
|
||||
|
||||
func cachedClient(
|
||||
realClient clientset.Interface,
|
||||
podInformer corev1informers.PodInformer,
|
||||
nodeInformer corev1informers.NodeInformer,
|
||||
namespaceInformer corev1informers.NamespaceInformer,
|
||||
priorityClassInformer schedulingv1informers.PriorityClassInformer,
|
||||
) (clientset.Interface, error) {
|
||||
fakeClient := fakeclientset.NewSimpleClientset()
|
||||
// simulate a pod eviction by deleting a pod
|
||||
fakeClient.PrependReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) {
|
||||
if action.GetSubresource() == "eviction" {
|
||||
createAct, matched := action.(core.CreateActionImpl)
|
||||
if !matched {
|
||||
return false, nil, fmt.Errorf("unable to convert action to core.CreateActionImpl")
|
||||
}
|
||||
eviction, matched := createAct.Object.(*policy.Eviction)
|
||||
if !matched {
|
||||
return false, nil, fmt.Errorf("unable to convert action object into *policy.Eviction")
|
||||
}
|
||||
if err := fakeClient.Tracker().Delete(action.GetResource(), eviction.GetNamespace(), eviction.GetName()); err != nil {
|
||||
return false, nil, fmt.Errorf("unable to delete pod %v/%v: %v", eviction.GetNamespace(), eviction.GetName(), err)
|
||||
}
|
||||
return true, nil, nil
|
||||
}
|
||||
// fallback to the default reactor
|
||||
return false, nil, nil
|
||||
})
|
||||
|
||||
klog.V(3).Infof("Pulling resources for the cached client from the cluster")
|
||||
pods, err := podInformer.Lister().List(labels.Everything())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to list pods: %v", err)
|
||||
}
|
||||
|
||||
for _, item := range pods {
|
||||
if _, err := fakeClient.CoreV1().Pods(item.Namespace).Create(context.TODO(), item, metav1.CreateOptions{}); err != nil {
|
||||
return nil, fmt.Errorf("unable to copy pod: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
nodes, err := nodeInformer.Lister().List(labels.Everything())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to list nodes: %v", err)
|
||||
}
|
||||
|
||||
for _, item := range nodes {
|
||||
if _, err := fakeClient.CoreV1().Nodes().Create(context.TODO(), item, metav1.CreateOptions{}); err != nil {
|
||||
return nil, fmt.Errorf("unable to copy node: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
namespaces, err := namespaceInformer.Lister().List(labels.Everything())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to list namespaces: %v", err)
|
||||
}
|
||||
|
||||
for _, item := range namespaces {
|
||||
if _, err := fakeClient.CoreV1().Namespaces().Create(context.TODO(), item, metav1.CreateOptions{}); err != nil {
|
||||
return nil, fmt.Errorf("unable to copy node: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
priorityClasses, err := priorityClassInformer.Lister().List(labels.Everything())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to list priorityclasses: %v", err)
|
||||
}
|
||||
|
||||
for _, item := range priorityClasses {
|
||||
if _, err := fakeClient.SchedulingV1().PriorityClasses().Create(context.TODO(), item, metav1.CreateOptions{}); err != nil {
|
||||
return nil, fmt.Errorf("unable to copy priorityclass: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
return fakeClient, nil
|
||||
}
|
||||
|
||||
func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer, deschedulerPolicy *api.DeschedulerPolicy, evictionPolicyGroupVersion string) error {
|
||||
func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer, deschedulerPolicy *api.DeschedulerPolicy, evictionPolicyGroupVersion string, stopChannel chan struct{}) error {
|
||||
sharedInformerFactory := informers.NewSharedInformerFactory(rs.Client, 0)
|
||||
nodeInformer := sharedInformerFactory.Core().V1().Nodes()
|
||||
podInformer := sharedInformerFactory.Core().V1().Pods()
|
||||
namespaceInformer := sharedInformerFactory.Core().V1().Namespaces()
|
||||
priorityClassInformer := sharedInformerFactory.Scheduling().V1().PriorityClasses()
|
||||
|
||||
ctx, cancel := context.WithCancel(ctx)
|
||||
defer cancel()
|
||||
sharedInformerFactory.Start(stopChannel)
|
||||
sharedInformerFactory.WaitForCacheSync(stopChannel)
|
||||
|
||||
// create the informers
|
||||
namespaceInformer.Informer()
|
||||
priorityClassInformer.Informer()
|
||||
|
||||
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
|
||||
if err != nil {
|
||||
return fmt.Errorf("build get pods assigned to node function error: %v", err)
|
||||
strategyFuncs := map[string]strategyFunction{
|
||||
"RemoveDuplicates": strategies.RemoveDuplicatePods,
|
||||
"LowNodeUtilization": strategies.LowNodeUtilization,
|
||||
"RemovePodsViolatingInterPodAntiAffinity": strategies.RemovePodsViolatingInterPodAntiAffinity,
|
||||
"RemovePodsViolatingNodeAffinity": strategies.RemovePodsViolatingNodeAffinity,
|
||||
"RemovePodsViolatingNodeTaints": strategies.RemovePodsViolatingNodeTaints,
|
||||
"RemovePodsHavingTooManyRestarts": strategies.RemovePodsHavingTooManyRestarts,
|
||||
"PodLifeTime": strategies.PodLifeTime,
|
||||
}
|
||||
|
||||
sharedInformerFactory.Start(ctx.Done())
|
||||
sharedInformerFactory.WaitForCacheSync(ctx.Done())
|
||||
|
||||
strategyFuncs := map[api.StrategyName]strategyFunction{
|
||||
"RemoveDuplicates": strategies.RemoveDuplicatePods,
|
||||
"LowNodeUtilization": nodeutilization.LowNodeUtilization,
|
||||
"HighNodeUtilization": nodeutilization.HighNodeUtilization,
|
||||
"RemovePodsViolatingInterPodAntiAffinity": strategies.RemovePodsViolatingInterPodAntiAffinity,
|
||||
"RemovePodsViolatingNodeAffinity": strategies.RemovePodsViolatingNodeAffinity,
|
||||
"RemovePodsViolatingNodeTaints": strategies.RemovePodsViolatingNodeTaints,
|
||||
"RemovePodsHavingTooManyRestarts": strategies.RemovePodsHavingTooManyRestarts,
|
||||
"PodLifeTime": strategies.PodLifeTime,
|
||||
"RemovePodsViolatingTopologySpreadConstraint": strategies.RemovePodsViolatingTopologySpreadConstraint,
|
||||
"RemoveFailedPods": strategies.RemoveFailedPods,
|
||||
}
|
||||
|
||||
var nodeSelector string
|
||||
if deschedulerPolicy.NodeSelector != nil {
|
||||
nodeSelector = *deschedulerPolicy.NodeSelector
|
||||
}
|
||||
|
||||
var evictLocalStoragePods bool
|
||||
if deschedulerPolicy.EvictLocalStoragePods != nil {
|
||||
evictLocalStoragePods = *deschedulerPolicy.EvictLocalStoragePods
|
||||
}
|
||||
|
||||
evictBarePods := false
|
||||
if deschedulerPolicy.EvictFailedBarePods != nil {
|
||||
evictBarePods = *deschedulerPolicy.EvictFailedBarePods
|
||||
if evictBarePods {
|
||||
klog.V(1).InfoS("Warning: EvictFailedBarePods is set to True. This could cause eviction of pods without ownerReferences.")
|
||||
}
|
||||
}
|
||||
|
||||
evictSystemCriticalPods := false
|
||||
if deschedulerPolicy.EvictSystemCriticalPods != nil {
|
||||
evictSystemCriticalPods = *deschedulerPolicy.EvictSystemCriticalPods
|
||||
if evictSystemCriticalPods {
|
||||
klog.V(1).InfoS("Warning: EvictSystemCriticalPods is set to True. This could cause eviction of Kubernetes system pods.")
|
||||
}
|
||||
}
|
||||
|
||||
ignorePvcPods := false
|
||||
if deschedulerPolicy.IgnorePVCPods != nil {
|
||||
ignorePvcPods = *deschedulerPolicy.IgnorePVCPods
|
||||
}
|
||||
|
||||
wait.NonSlidingUntil(func() {
|
||||
nodes, err := nodeutil.ReadyNodes(ctx, rs.Client, nodeInformer, nodeSelector)
|
||||
wait.Until(func() {
|
||||
nodes, err := nodeutil.ReadyNodes(ctx, rs.Client, nodeInformer, rs.NodeSelector, stopChannel)
|
||||
if err != nil {
|
||||
klog.V(1).InfoS("Unable to get ready nodes", "err", err)
|
||||
cancel()
|
||||
klog.V(1).Infof("Unable to get ready nodes: %v", err)
|
||||
close(stopChannel)
|
||||
return
|
||||
}
|
||||
|
||||
if len(nodes) <= 1 {
|
||||
klog.V(1).InfoS("The cluster size is 0 or 1 meaning eviction causes service disruption or degradation. So aborting..")
|
||||
cancel()
|
||||
klog.V(1).Infof("The cluster size is 0 or 1 meaning eviction causes service disruption or degradation. So aborting..")
|
||||
close(stopChannel)
|
||||
return
|
||||
}
|
||||
|
||||
var podEvictorClient clientset.Interface
|
||||
// When the dry mode is enable, collect all the relevant objects (mostly pods) under a fake client.
|
||||
// So when evicting pods while running multiple strategies in a row have the cummulative effect
|
||||
// as is when evicting pods for real.
|
||||
if rs.DryRun {
|
||||
klog.V(3).Infof("Building a cached client from the cluster for the dry run")
|
||||
// Create a new cache so we start from scratch without any leftovers
|
||||
fakeClient, err := cachedClient(rs.Client, podInformer, nodeInformer, namespaceInformer, priorityClassInformer)
|
||||
if err != nil {
|
||||
klog.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
fakeSharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
|
||||
getPodsAssignedToNode, err = podutil.BuildGetPodsAssignedToNodeFunc(fakeSharedInformerFactory.Core().V1().Pods())
|
||||
if err != nil {
|
||||
klog.Errorf("build get pods assigned to node function error: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
fakeCtx, cncl := context.WithCancel(context.TODO())
|
||||
defer cncl()
|
||||
fakeSharedInformerFactory.Start(fakeCtx.Done())
|
||||
fakeSharedInformerFactory.WaitForCacheSync(fakeCtx.Done())
|
||||
|
||||
podEvictorClient = fakeClient
|
||||
} else {
|
||||
podEvictorClient = rs.Client
|
||||
}
|
||||
|
||||
klog.V(3).Infof("Building a pod evictor")
|
||||
podEvictor := evictions.NewPodEvictor(
|
||||
podEvictorClient,
|
||||
rs.Client,
|
||||
evictionPolicyGroupVersion,
|
||||
rs.DryRun,
|
||||
deschedulerPolicy.MaxNoOfPodsToEvictPerNode,
|
||||
deschedulerPolicy.MaxNoOfPodsToEvictPerNamespace,
|
||||
rs.MaxNoOfPodsToEvictPerNode,
|
||||
nodes,
|
||||
getPodsAssignedToNode,
|
||||
evictLocalStoragePods,
|
||||
evictSystemCriticalPods,
|
||||
ignorePvcPods,
|
||||
evictBarePods,
|
||||
!rs.DisableMetrics,
|
||||
)
|
||||
|
||||
for name, strategy := range deschedulerPolicy.Strategies {
|
||||
if f, ok := strategyFuncs[name]; ok {
|
||||
if strategy.Enabled {
|
||||
f(ctx, rs.Client, strategy, nodes, podEvictor, getPodsAssignedToNode)
|
||||
}
|
||||
} else {
|
||||
klog.ErrorS(fmt.Errorf("unknown strategy name"), "skipping strategy", "strategy", name)
|
||||
for name, f := range strategyFuncs {
|
||||
if strategy := deschedulerPolicy.Strategies[api.StrategyName(name)]; strategy.Enabled {
|
||||
f(ctx, rs.Client, strategy, nodes, rs.EvictLocalStoragePods, podEvictor)
|
||||
}
|
||||
}
|
||||
|
||||
klog.V(1).InfoS("Number of evicted pods", "totalEvicted", podEvictor.TotalEvicted())
|
||||
|
||||
// If there was no interval specified, send a signal to the stopChannel to end the wait.Until loop after 1 iteration
|
||||
if rs.DeschedulingInterval.Seconds() == 0 {
|
||||
cancel()
|
||||
close(stopChannel)
|
||||
}
|
||||
}, rs.DeschedulingInterval, ctx.Done())
|
||||
}, rs.DeschedulingInterval, stopChannel)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -7,7 +7,7 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/wait"
|
||||
fakeclientset "k8s.io/client-go/kubernetes/fake"
|
||||
@@ -35,27 +35,21 @@ func TestTaintsUpdated(t *testing.T) {
|
||||
},
|
||||
}
|
||||
|
||||
rs, err := options.NewDeschedulerServer()
|
||||
if err != nil {
|
||||
t.Fatalf("Unable to initialize server: %v", err)
|
||||
}
|
||||
stopChannel := make(chan struct{})
|
||||
defer close(stopChannel)
|
||||
|
||||
rs := options.NewDeschedulerServer()
|
||||
rs.Client = client
|
||||
rs.DeschedulingInterval = 100 * time.Millisecond
|
||||
errChan := make(chan error, 1)
|
||||
defer close(errChan)
|
||||
go func() {
|
||||
err := RunDeschedulerStrategies(ctx, rs, dp, "v1beta1")
|
||||
errChan <- err
|
||||
}()
|
||||
select {
|
||||
case err := <-errChan:
|
||||
err := RunDeschedulerStrategies(ctx, rs, dp, "v1beta1", stopChannel)
|
||||
if err != nil {
|
||||
t.Fatalf("Unable to run descheduler strategies: %v", err)
|
||||
}
|
||||
case <-time.After(1 * time.Second):
|
||||
// Wait for few cycles and then verify the only pod still exists
|
||||
}
|
||||
}()
|
||||
|
||||
// Wait for few cycles and then verify the only pod still exists
|
||||
time.Sleep(300 * time.Millisecond)
|
||||
pods, err := client.CoreV1().Pods(p1.Namespace).List(ctx, metav1.ListOptions{})
|
||||
if err != nil {
|
||||
t.Errorf("Unable to list pods: %v", err)
|
||||
@@ -98,69 +92,3 @@ func TestTaintsUpdated(t *testing.T) {
|
||||
t.Fatalf("Unable to evict pod, node taint did not get propagated to descheduler strategies")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRootCancel(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
n1 := test.BuildTestNode("n1", 2000, 3000, 10, nil)
|
||||
n2 := test.BuildTestNode("n2", 2000, 3000, 10, nil)
|
||||
client := fakeclientset.NewSimpleClientset(n1, n2)
|
||||
dp := &api.DeschedulerPolicy{
|
||||
Strategies: api.StrategyList{}, // no strategies needed for this test
|
||||
}
|
||||
|
||||
rs, err := options.NewDeschedulerServer()
|
||||
if err != nil {
|
||||
t.Fatalf("Unable to initialize server: %v", err)
|
||||
}
|
||||
rs.Client = client
|
||||
rs.DeschedulingInterval = 100 * time.Millisecond
|
||||
errChan := make(chan error, 1)
|
||||
defer close(errChan)
|
||||
|
||||
go func() {
|
||||
err := RunDeschedulerStrategies(ctx, rs, dp, "v1beta1")
|
||||
errChan <- err
|
||||
}()
|
||||
cancel()
|
||||
select {
|
||||
case err := <-errChan:
|
||||
if err != nil {
|
||||
t.Fatalf("Unable to run descheduler strategies: %v", err)
|
||||
}
|
||||
case <-time.After(1 * time.Second):
|
||||
t.Fatal("Root ctx should have canceled immediately")
|
||||
}
|
||||
}
|
||||
|
||||
func TestRootCancelWithNoInterval(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
n1 := test.BuildTestNode("n1", 2000, 3000, 10, nil)
|
||||
n2 := test.BuildTestNode("n2", 2000, 3000, 10, nil)
|
||||
client := fakeclientset.NewSimpleClientset(n1, n2)
|
||||
dp := &api.DeschedulerPolicy{
|
||||
Strategies: api.StrategyList{}, // no strategies needed for this test
|
||||
}
|
||||
|
||||
rs, err := options.NewDeschedulerServer()
|
||||
if err != nil {
|
||||
t.Fatalf("Unable to initialize server: %v", err)
|
||||
}
|
||||
rs.Client = client
|
||||
rs.DeschedulingInterval = 0
|
||||
errChan := make(chan error, 1)
|
||||
defer close(errChan)
|
||||
|
||||
go func() {
|
||||
err := RunDeschedulerStrategies(ctx, rs, dp, "v1beta1")
|
||||
errChan <- err
|
||||
}()
|
||||
cancel()
|
||||
select {
|
||||
case err := <-errChan:
|
||||
if err != nil {
|
||||
t.Fatalf("Unable to run descheduler strategies: %v", err)
|
||||
}
|
||||
case <-time.After(1 * time.Second):
|
||||
t.Fatal("Root ctx should have canceled immediately")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -19,99 +19,61 @@ package evictions
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
policy "k8s.io/api/policy/v1beta1"
|
||||
apierrors "k8s.io/apimachinery/pkg/api/errors"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/util/errors"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/kubernetes/scheme"
|
||||
clientcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
|
||||
"k8s.io/client-go/tools/record"
|
||||
"k8s.io/klog/v2"
|
||||
"sigs.k8s.io/descheduler/metrics"
|
||||
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
|
||||
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
"sigs.k8s.io/descheduler/pkg/utils"
|
||||
"k8s.io/klog"
|
||||
|
||||
eutils "sigs.k8s.io/descheduler/pkg/descheduler/evictions/utils"
|
||||
)
|
||||
|
||||
const (
|
||||
evictPodAnnotationKey = "descheduler.alpha.kubernetes.io/evict"
|
||||
)
|
||||
|
||||
// nodePodEvictedCount keeps count of pods evicted on node
|
||||
type nodePodEvictedCount map[*v1.Node]uint
|
||||
type namespacePodEvictCount map[string]uint
|
||||
type nodePodEvictedCount map[*v1.Node]int
|
||||
|
||||
type PodEvictor struct {
|
||||
client clientset.Interface
|
||||
nodes []*v1.Node
|
||||
nodeIndexer podutil.GetPodsAssignedToNodeFunc
|
||||
policyGroupVersion string
|
||||
dryRun bool
|
||||
maxPodsToEvictPerNode *uint
|
||||
maxPodsToEvictPerNamespace *uint
|
||||
nodepodCount nodePodEvictedCount
|
||||
namespacePodCount namespacePodEvictCount
|
||||
evictFailedBarePods bool
|
||||
evictLocalStoragePods bool
|
||||
evictSystemCriticalPods bool
|
||||
ignorePvcPods bool
|
||||
metricsEnabled bool
|
||||
client clientset.Interface
|
||||
policyGroupVersion string
|
||||
dryRun bool
|
||||
maxPodsToEvict int
|
||||
nodepodCount nodePodEvictedCount
|
||||
}
|
||||
|
||||
func NewPodEvictor(
|
||||
client clientset.Interface,
|
||||
policyGroupVersion string,
|
||||
dryRun bool,
|
||||
maxPodsToEvictPerNode *uint,
|
||||
maxPodsToEvictPerNamespace *uint,
|
||||
maxPodsToEvict int,
|
||||
nodes []*v1.Node,
|
||||
nodeIndexer podutil.GetPodsAssignedToNodeFunc,
|
||||
evictLocalStoragePods bool,
|
||||
evictSystemCriticalPods bool,
|
||||
ignorePvcPods bool,
|
||||
evictFailedBarePods bool,
|
||||
metricsEnabled bool,
|
||||
) *PodEvictor {
|
||||
var nodePodCount = make(nodePodEvictedCount)
|
||||
var namespacePodCount = make(namespacePodEvictCount)
|
||||
for _, node := range nodes {
|
||||
// Initialize podsEvicted till now with 0.
|
||||
nodePodCount[node] = 0
|
||||
}
|
||||
|
||||
return &PodEvictor{
|
||||
client: client,
|
||||
nodes: nodes,
|
||||
nodeIndexer: nodeIndexer,
|
||||
policyGroupVersion: policyGroupVersion,
|
||||
dryRun: dryRun,
|
||||
maxPodsToEvictPerNode: maxPodsToEvictPerNode,
|
||||
maxPodsToEvictPerNamespace: maxPodsToEvictPerNamespace,
|
||||
nodepodCount: nodePodCount,
|
||||
namespacePodCount: namespacePodCount,
|
||||
evictLocalStoragePods: evictLocalStoragePods,
|
||||
evictSystemCriticalPods: evictSystemCriticalPods,
|
||||
evictFailedBarePods: evictFailedBarePods,
|
||||
ignorePvcPods: ignorePvcPods,
|
||||
metricsEnabled: metricsEnabled,
|
||||
client: client,
|
||||
policyGroupVersion: policyGroupVersion,
|
||||
dryRun: dryRun,
|
||||
maxPodsToEvict: maxPodsToEvict,
|
||||
nodepodCount: nodePodCount,
|
||||
}
|
||||
}
|
||||
|
||||
// NodeEvicted gives a number of pods evicted for node
|
||||
func (pe *PodEvictor) NodeEvicted(node *v1.Node) uint {
|
||||
func (pe *PodEvictor) NodeEvicted(node *v1.Node) int {
|
||||
return pe.nodepodCount[node]
|
||||
}
|
||||
|
||||
// TotalEvicted gives a number of pods evicted through all nodes
|
||||
func (pe *PodEvictor) TotalEvicted() uint {
|
||||
var total uint
|
||||
func (pe *PodEvictor) TotalEvicted() int {
|
||||
var total int
|
||||
for _, count := range pe.nodepodCount {
|
||||
total += count
|
||||
}
|
||||
@@ -119,58 +81,33 @@ func (pe *PodEvictor) TotalEvicted() uint {
|
||||
}
|
||||
|
||||
// EvictPod returns non-nil error only when evicting a pod on a node is not
|
||||
// possible (due to maxPodsToEvictPerNode constraint). Success is true when the pod
|
||||
// possible (due to maxPodsToEvict constraint). Success is true when the pod
|
||||
// is evicted on the server side.
|
||||
func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, node *v1.Node, strategy string, reasons ...string) (bool, error) {
|
||||
reason := strategy
|
||||
if len(reasons) > 0 {
|
||||
reason += " (" + strings.Join(reasons, ", ") + ")"
|
||||
}
|
||||
if pe.maxPodsToEvictPerNode != nil && pe.nodepodCount[node]+1 > *pe.maxPodsToEvictPerNode {
|
||||
if pe.metricsEnabled {
|
||||
metrics.PodsEvicted.With(map[string]string{"result": "maximum number of pods per node reached", "strategy": strategy, "namespace": pod.Namespace, "node": node.Name}).Inc()
|
||||
}
|
||||
return false, fmt.Errorf("Maximum number %v of evicted pods per %q node reached", *pe.maxPodsToEvictPerNode, node.Name)
|
||||
func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, node *v1.Node) (bool, error) {
|
||||
if pe.maxPodsToEvict > 0 && pe.nodepodCount[node]+1 > pe.maxPodsToEvict {
|
||||
return false, fmt.Errorf("Maximum number %v of evicted pods per %q node reached", pe.maxPodsToEvict, node.Name)
|
||||
}
|
||||
|
||||
if pe.maxPodsToEvictPerNamespace != nil && pe.namespacePodCount[pod.Namespace]+1 > *pe.maxPodsToEvictPerNamespace {
|
||||
if pe.metricsEnabled {
|
||||
metrics.PodsEvicted.With(map[string]string{"result": "maximum number of pods per namespace reached", "strategy": strategy, "namespace": pod.Namespace, "node": node.Name}).Inc()
|
||||
}
|
||||
return false, fmt.Errorf("Maximum number %v of evicted pods per %q namespace reached", *pe.maxPodsToEvictPerNamespace, pod.Namespace)
|
||||
}
|
||||
|
||||
err := evictPod(ctx, pe.client, pod, pe.policyGroupVersion)
|
||||
err := evictPod(ctx, pe.client, pod, pe.policyGroupVersion, pe.dryRun)
|
||||
if err != nil {
|
||||
// err is used only for logging purposes
|
||||
klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod), "reason", reason)
|
||||
if pe.metricsEnabled {
|
||||
metrics.PodsEvicted.With(map[string]string{"result": "error", "strategy": strategy, "namespace": pod.Namespace, "node": node.Name}).Inc()
|
||||
}
|
||||
klog.Errorf("Error evicting pod: %#v in namespace %#v (%#v)", pod.Name, pod.Namespace, err)
|
||||
return false, nil
|
||||
}
|
||||
|
||||
pe.nodepodCount[node]++
|
||||
pe.namespacePodCount[pod.Namespace]++
|
||||
|
||||
if pe.metricsEnabled {
|
||||
metrics.PodsEvicted.With(map[string]string{"result": "success", "strategy": strategy, "namespace": pod.Namespace, "node": node.Name}).Inc()
|
||||
}
|
||||
|
||||
if pe.dryRun {
|
||||
klog.V(1).InfoS("Evicted pod in dry run mode", "pod", klog.KObj(pod), "reason", reason, "strategy", strategy, "node", node.Name)
|
||||
klog.V(1).Infof("Evicted pod in dry run mode: %#v in namespace %#v", pod.Name, pod.Namespace)
|
||||
} else {
|
||||
klog.V(1).InfoS("Evicted pod", "pod", klog.KObj(pod), "reason", reason, "strategy", strategy, "node", node.Name)
|
||||
eventBroadcaster := record.NewBroadcaster()
|
||||
eventBroadcaster.StartStructuredLogging(3)
|
||||
eventBroadcaster.StartRecordingToSink(&clientcorev1.EventSinkImpl{Interface: pe.client.CoreV1().Events(pod.Namespace)})
|
||||
r := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "sigs.k8s.io.descheduler"})
|
||||
r.Event(pod, v1.EventTypeNormal, "Descheduled", fmt.Sprintf("pod evicted by sigs.k8s.io/descheduler%s", reason))
|
||||
klog.V(1).Infof("Evicted pod: %#v in namespace %#v", pod.Name, pod.Namespace)
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, policyGroupVersion string) error {
|
||||
func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, policyGroupVersion string, dryRun bool) error {
|
||||
if dryRun {
|
||||
return nil
|
||||
}
|
||||
deleteOptions := &metav1.DeleteOptions{}
|
||||
// GracePeriodSeconds ?
|
||||
eviction := &policy.Eviction{
|
||||
@@ -186,6 +123,14 @@ func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, poli
|
||||
}
|
||||
err := client.PolicyV1beta1().Evictions(eviction.Namespace).Evict(ctx, eviction)
|
||||
|
||||
if err == nil {
|
||||
eventBroadcaster := record.NewBroadcaster()
|
||||
eventBroadcaster.StartLogging(klog.V(3).Infof)
|
||||
eventBroadcaster.StartRecordingToSink(&clientcorev1.EventSinkImpl{Interface: client.CoreV1().Events(pod.Namespace)})
|
||||
r := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "sigs.k8s.io.descheduler"})
|
||||
r.Event(pod, v1.EventTypeNormal, "Descheduled", "pod evicted by sigs.k8s.io/descheduler")
|
||||
return nil
|
||||
}
|
||||
if apierrors.IsTooManyRequests(err) {
|
||||
return fmt.Errorf("error when evicting pod (ignoring) %q: %v", pod.Name, err)
|
||||
}
|
||||
@@ -194,171 +139,3 @@ func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, poli
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
type Options struct {
|
||||
priority *int32
|
||||
nodeFit bool
|
||||
labelSelector labels.Selector
|
||||
}
|
||||
|
||||
// WithPriorityThreshold sets a threshold for pod's priority class.
|
||||
// Any pod whose priority class is lower is evictable.
|
||||
func WithPriorityThreshold(priority int32) func(opts *Options) {
|
||||
return func(opts *Options) {
|
||||
var p int32 = priority
|
||||
opts.priority = &p
|
||||
}
|
||||
}
|
||||
|
||||
// WithNodeFit sets whether or not to consider taints, node selectors,
|
||||
// and pod affinity when evicting. A pod whose tolerations, node selectors,
|
||||
// and affinity match a node other than the one it is currently running on
|
||||
// is evictable.
|
||||
func WithNodeFit(nodeFit bool) func(opts *Options) {
|
||||
return func(opts *Options) {
|
||||
opts.nodeFit = nodeFit
|
||||
}
|
||||
}
|
||||
|
||||
// WithLabelSelector sets whether or not to apply label filtering when evicting.
|
||||
// Any pod matching the label selector is considered evictable.
|
||||
func WithLabelSelector(labelSelector labels.Selector) func(opts *Options) {
|
||||
return func(opts *Options) {
|
||||
opts.labelSelector = labelSelector
|
||||
}
|
||||
}
|
||||
|
||||
type constraint func(pod *v1.Pod) error
|
||||
|
||||
type evictable struct {
|
||||
constraints []constraint
|
||||
}
|
||||
|
||||
// Evictable provides an implementation of IsEvictable(IsEvictable(pod *v1.Pod) bool).
|
||||
// The method accepts a list of options which allow to extend constraints
|
||||
// which decides when a pod is considered evictable.
|
||||
func (pe *PodEvictor) Evictable(opts ...func(opts *Options)) *evictable {
|
||||
options := &Options{}
|
||||
for _, opt := range opts {
|
||||
opt(options)
|
||||
}
|
||||
|
||||
ev := &evictable{}
|
||||
if pe.evictFailedBarePods {
|
||||
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
|
||||
ownerRefList := podutil.OwnerRef(pod)
|
||||
// Enable evictFailedBarePods to evict bare pods in failed phase
|
||||
if len(ownerRefList) == 0 && pod.Status.Phase != v1.PodFailed {
|
||||
return fmt.Errorf("pod does not have any ownerRefs and is not in failed phase")
|
||||
}
|
||||
return nil
|
||||
})
|
||||
} else {
|
||||
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
|
||||
ownerRefList := podutil.OwnerRef(pod)
|
||||
// Moved from IsEvictable function for backward compatibility
|
||||
if len(ownerRefList) == 0 {
|
||||
return fmt.Errorf("pod does not have any ownerRefs")
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
if !pe.evictSystemCriticalPods {
|
||||
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
|
||||
// Moved from IsEvictable function to allow for disabling
|
||||
if utils.IsCriticalPriorityPod(pod) {
|
||||
return fmt.Errorf("pod has system critical priority")
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
if options.priority != nil {
|
||||
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
|
||||
if IsPodEvictableBasedOnPriority(pod, *options.priority) {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("pod has higher priority than specified priority class threshold")
|
||||
})
|
||||
}
|
||||
}
|
||||
if !pe.evictLocalStoragePods {
|
||||
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
|
||||
if utils.IsPodWithLocalStorage(pod) {
|
||||
return fmt.Errorf("pod has local storage and descheduler is not configured with evictLocalStoragePods")
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
if pe.ignorePvcPods {
|
||||
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
|
||||
if utils.IsPodWithPVC(pod) {
|
||||
return fmt.Errorf("pod has a PVC and descheduler is configured to ignore PVC pods")
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
if options.nodeFit {
|
||||
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
|
||||
if !nodeutil.PodFitsAnyOtherNode(pe.nodeIndexer, pod, pe.nodes) {
|
||||
return fmt.Errorf("pod does not fit on any other node because of nodeSelector(s), Taint(s), or nodes marked as unschedulable")
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
if options.labelSelector != nil && !options.labelSelector.Empty() {
|
||||
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
|
||||
if !options.labelSelector.Matches(labels.Set(pod.Labels)) {
|
||||
return fmt.Errorf("pod labels do not match the labelSelector filter in the policy parameter")
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
return ev
|
||||
}
|
||||
|
||||
// IsEvictable decides when a pod is evictable
|
||||
func (ev *evictable) IsEvictable(pod *v1.Pod) bool {
|
||||
checkErrs := []error{}
|
||||
|
||||
ownerRefList := podutil.OwnerRef(pod)
|
||||
if utils.IsDaemonsetPod(ownerRefList) {
|
||||
checkErrs = append(checkErrs, fmt.Errorf("pod is a DaemonSet pod"))
|
||||
}
|
||||
|
||||
if utils.IsMirrorPod(pod) {
|
||||
checkErrs = append(checkErrs, fmt.Errorf("pod is a mirror pod"))
|
||||
}
|
||||
|
||||
if utils.IsStaticPod(pod) {
|
||||
checkErrs = append(checkErrs, fmt.Errorf("pod is a static pod"))
|
||||
}
|
||||
|
||||
if utils.IsPodTerminating(pod) {
|
||||
checkErrs = append(checkErrs, fmt.Errorf("pod is terminating"))
|
||||
}
|
||||
|
||||
for _, c := range ev.constraints {
|
||||
if err := c(pod); err != nil {
|
||||
checkErrs = append(checkErrs, err)
|
||||
}
|
||||
}
|
||||
|
||||
if len(checkErrs) > 0 && !HaveEvictAnnotation(pod) {
|
||||
klog.V(4).InfoS("Pod lacks an eviction annotation and fails the following checks", "pod", klog.KObj(pod), "checks", errors.NewAggregate(checkErrs).Error())
|
||||
return false
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// HaveEvictAnnotation checks if the pod have evict annotation
|
||||
func HaveEvictAnnotation(pod *v1.Pod) bool {
|
||||
_, found := pod.ObjectMeta.Annotations[evictPodAnnotationKey]
|
||||
return found
|
||||
}
|
||||
|
||||
// IsPodEvictableBasedOnPriority checks if the given pod is evictable based on priority resolved from pod Spec.
|
||||
func IsPodEvictableBasedOnPriority(pod *v1.Pod, priority int32) bool {
|
||||
return pod.Spec.Priority == nil || *pod.Spec.Priority < priority
|
||||
}
|
||||
|
||||
@@ -21,14 +21,9 @@ import (
|
||||
"testing"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
policyv1 "k8s.io/api/policy/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/client-go/informers"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
core "k8s.io/client-go/testing"
|
||||
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
"sigs.k8s.io/descheduler/pkg/utils"
|
||||
"sigs.k8s.io/descheduler/test"
|
||||
)
|
||||
|
||||
@@ -64,651 +59,9 @@ func TestEvictPod(t *testing.T) {
|
||||
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
|
||||
return true, &v1.PodList{Items: test.pods}, nil
|
||||
})
|
||||
got := evictPod(ctx, fakeClient, test.pod, "v1")
|
||||
got := evictPod(ctx, fakeClient, test.pod, "v1", false)
|
||||
if got != test.want {
|
||||
t.Errorf("Test error for Desc: %s. Expected %v pod eviction to be %v, got %v", test.description, test.pod.Name, test.want, got)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestIsEvictable(t *testing.T) {
|
||||
n1 := test.BuildTestNode("node1", 1000, 2000, 13, nil)
|
||||
lowPriority := int32(800)
|
||||
highPriority := int32(900)
|
||||
|
||||
nodeTaintKey := "hardware"
|
||||
nodeTaintValue := "gpu"
|
||||
|
||||
nodeLabelKey := "datacenter"
|
||||
nodeLabelValue := "east"
|
||||
type testCase struct {
|
||||
description string
|
||||
pods []*v1.Pod
|
||||
nodes []*v1.Node
|
||||
evictFailedBarePods bool
|
||||
evictLocalStoragePods bool
|
||||
evictSystemCriticalPods bool
|
||||
priorityThreshold *int32
|
||||
nodeFit bool
|
||||
result bool
|
||||
}
|
||||
|
||||
testCases := []testCase{
|
||||
{
|
||||
description: "Failed pod eviction with no ownerRefs",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("bare_pod_failed", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.Status.Phase = v1.PodFailed
|
||||
}),
|
||||
},
|
||||
evictFailedBarePods: false,
|
||||
result: false,
|
||||
}, {
|
||||
description: "Normal pod eviction with no ownerRefs and evictFailedBarePods enabled",
|
||||
pods: []*v1.Pod{test.BuildTestPod("bare_pod", 400, 0, n1.Name, nil)},
|
||||
evictFailedBarePods: true,
|
||||
result: false,
|
||||
}, {
|
||||
description: "Failed pod eviction with no ownerRefs",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("bare_pod_failed_but_can_be_evicted", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.Status.Phase = v1.PodFailed
|
||||
}),
|
||||
},
|
||||
evictFailedBarePods: true,
|
||||
result: true,
|
||||
}, {
|
||||
description: "Normal pod eviction with normal ownerRefs",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
result: true,
|
||||
}, {
|
||||
description: "Normal pod eviction with normal ownerRefs and descheduler.alpha.kubernetes.io/evict annotation",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p2", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
result: true,
|
||||
}, {
|
||||
description: "Normal pod eviction with replicaSet ownerRefs",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p3", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
result: true,
|
||||
}, {
|
||||
description: "Normal pod eviction with replicaSet ownerRefs and descheduler.alpha.kubernetes.io/evict annotation",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p4", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
|
||||
pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
result: true,
|
||||
}, {
|
||||
description: "Normal pod eviction with statefulSet ownerRefs",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p18", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
result: true,
|
||||
}, {
|
||||
description: "Normal pod eviction with statefulSet ownerRefs and descheduler.alpha.kubernetes.io/evict annotation",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p19", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
|
||||
pod.ObjectMeta.OwnerReferences = test.GetStatefulSetOwnerRefList()
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
result: true,
|
||||
}, {
|
||||
description: "Pod not evicted because it is bound to a PV and evictLocalStoragePods = false",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p5", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Spec.Volumes = []v1.Volume{
|
||||
{
|
||||
Name: "sample",
|
||||
VolumeSource: v1.VolumeSource{
|
||||
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
||||
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
|
||||
},
|
||||
},
|
||||
}
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
result: false,
|
||||
}, {
|
||||
description: "Pod is evicted because it is bound to a PV and evictLocalStoragePods = true",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p6", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Spec.Volumes = []v1.Volume{
|
||||
{
|
||||
Name: "sample",
|
||||
VolumeSource: v1.VolumeSource{
|
||||
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
||||
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
|
||||
},
|
||||
},
|
||||
}
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: true,
|
||||
evictSystemCriticalPods: false,
|
||||
result: true,
|
||||
}, {
|
||||
description: "Pod is evicted because it is bound to a PV and evictLocalStoragePods = false, but it has scheduler.alpha.kubernetes.io/evict annotation",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p7", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Spec.Volumes = []v1.Volume{
|
||||
{
|
||||
Name: "sample",
|
||||
VolumeSource: v1.VolumeSource{
|
||||
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
||||
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
|
||||
},
|
||||
},
|
||||
}
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
result: true,
|
||||
}, {
|
||||
description: "Pod not evicted becasuse it is part of a daemonSet",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p8", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
result: false,
|
||||
}, {
|
||||
description: "Pod is evicted becasuse it is part of a daemonSet, but it has scheduler.alpha.kubernetes.io/evict annotation",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p9", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
|
||||
pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
result: true,
|
||||
}, {
|
||||
description: "Pod not evicted becasuse it is a mirror poddsa",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p10", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Annotations = test.GetMirrorPodAnnotation()
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
result: false,
|
||||
}, {
|
||||
description: "Pod is evicted becasuse it is a mirror pod, but it has scheduler.alpha.kubernetes.io/evict annotation",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p11", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Annotations = test.GetMirrorPodAnnotation()
|
||||
pod.Annotations["descheduler.alpha.kubernetes.io/evict"] = "true"
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
result: true,
|
||||
}, {
|
||||
description: "Pod not evicted becasuse it has system critical priority",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p12", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
priority := utils.SystemCriticalPriority
|
||||
pod.Spec.Priority = &priority
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
result: false,
|
||||
}, {
|
||||
description: "Pod is evicted becasuse it has system critical priority, but it has scheduler.alpha.kubernetes.io/evict annotation",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p13", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
priority := utils.SystemCriticalPriority
|
||||
pod.Spec.Priority = &priority
|
||||
pod.Annotations = map[string]string{
|
||||
"descheduler.alpha.kubernetes.io/evict": "true",
|
||||
}
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
result: true,
|
||||
}, {
|
||||
description: "Pod not evicted becasuse it has a priority higher than the configured priority threshold",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p14", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Spec.Priority = &highPriority
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
priorityThreshold: &lowPriority,
|
||||
result: false,
|
||||
}, {
|
||||
description: "Pod is evicted becasuse it has a priority higher than the configured priority threshold, but it has scheduler.alpha.kubernetes.io/evict annotation",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p15", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
|
||||
pod.Spec.Priority = &highPriority
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
priorityThreshold: &lowPriority,
|
||||
result: true,
|
||||
}, {
|
||||
description: "Pod is evicted becasuse it has system critical priority, but evictSystemCriticalPods = true",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p16", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
priority := utils.SystemCriticalPriority
|
||||
pod.Spec.Priority = &priority
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: true,
|
||||
result: true,
|
||||
}, {
|
||||
description: "Pod is evicted becasuse it has system critical priority, but evictSystemCriticalPods = true and it has scheduler.alpha.kubernetes.io/evict annotation",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p16", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
|
||||
priority := utils.SystemCriticalPriority
|
||||
pod.Spec.Priority = &priority
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: true,
|
||||
result: true,
|
||||
}, {
|
||||
description: "Pod is evicted becasuse it has a priority higher than the configured priority threshold, but evictSystemCriticalPods = true",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p17", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Spec.Priority = &highPriority
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: true,
|
||||
priorityThreshold: &lowPriority,
|
||||
result: true,
|
||||
}, {
|
||||
description: "Pod is evicted becasuse it has a priority higher than the configured priority threshold, but evictSystemCriticalPods = true and it has scheduler.alpha.kubernetes.io/evict annotation",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p17", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
|
||||
pod.Spec.Priority = &highPriority
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: true,
|
||||
priorityThreshold: &lowPriority,
|
||||
result: true,
|
||||
}, {
|
||||
description: "Pod with no tolerations running on normal node, all other nodes tainted",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
}),
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("node2", 1000, 2000, 13, func(node *v1.Node) {
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: nodeTaintKey,
|
||||
Value: nodeTaintValue,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
}),
|
||||
test.BuildTestNode("node3", 1000, 2000, 13, func(node *v1.Node) {
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: nodeTaintKey,
|
||||
Value: nodeTaintValue,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
nodeFit: true,
|
||||
result: false,
|
||||
}, {
|
||||
description: "Pod with correct tolerations running on normal node, all other nodes tainted",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Spec.Tolerations = []v1.Toleration{
|
||||
{
|
||||
Key: nodeTaintKey,
|
||||
Value: nodeTaintValue,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
}),
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("node2", 1000, 2000, 13, func(node *v1.Node) {
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: nodeTaintKey,
|
||||
Value: nodeTaintValue,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
}),
|
||||
test.BuildTestNode("node3", 1000, 2000, 13, func(node *v1.Node) {
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: nodeTaintKey,
|
||||
Value: nodeTaintValue,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
nodeFit: true,
|
||||
result: true,
|
||||
}, {
|
||||
description: "Pod with incorrect node selector",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: "fail",
|
||||
}
|
||||
}),
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("node2", 1000, 2000, 13, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
test.BuildTestNode("node3", 1000, 2000, 13, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
nodeFit: true,
|
||||
result: false,
|
||||
}, {
|
||||
description: "Pod with correct node selector",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("node2", 1000, 2000, 13, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
test.BuildTestNode("node3", 1000, 2000, 13, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
nodeFit: true,
|
||||
result: true,
|
||||
}, {
|
||||
description: "Pod with correct node selector, but only available node doesn't have enough CPU",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 12, 8, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("node2-TEST", 10, 16, 10, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
test.BuildTestNode("node3-TEST", 10, 16, 10, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
nodeFit: true,
|
||||
result: false,
|
||||
}, {
|
||||
description: "Pod with correct node selector, and one node has enough memory",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 12, 8, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
test.BuildTestPod("node2-pod-10GB-mem", 20, 10, "node2", func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.Labels = map[string]string{
|
||||
"test": "true",
|
||||
}
|
||||
}),
|
||||
test.BuildTestPod("node3-pod-10GB-mem", 20, 10, "node3", func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.Labels = map[string]string{
|
||||
"test": "true",
|
||||
}
|
||||
}),
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("node2", 100, 16, 10, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
test.BuildTestNode("node3", 100, 20, 10, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
nodeFit: true,
|
||||
result: true,
|
||||
}, {
|
||||
description: "Pod with correct node selector, but both nodes don't have enough memory",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 12, 8, n1.Name, func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
test.BuildTestPod("node2-pod-10GB-mem", 10, 10, "node2", func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.Labels = map[string]string{
|
||||
"test": "true",
|
||||
}
|
||||
}),
|
||||
test.BuildTestPod("node3-pod-10GB-mem", 10, 10, "node3", func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.Labels = map[string]string{
|
||||
"test": "true",
|
||||
}
|
||||
}),
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("node2", 100, 16, 10, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
test.BuildTestNode("node3", 100, 16, 10, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
nodeFit: true,
|
||||
result: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range testCases {
|
||||
|
||||
t.Run(test.description, func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
nodes := append(test.nodes, n1)
|
||||
|
||||
var objs []runtime.Object
|
||||
for _, node := range test.nodes {
|
||||
objs = append(objs, node)
|
||||
}
|
||||
for _, pod := range test.pods {
|
||||
objs = append(objs, pod)
|
||||
}
|
||||
|
||||
fakeClient := fake.NewSimpleClientset(objs...)
|
||||
|
||||
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
|
||||
podInformer := sharedInformerFactory.Core().V1().Pods()
|
||||
|
||||
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
|
||||
if err != nil {
|
||||
t.Errorf("Build get pods assigned to node function error: %v", err)
|
||||
}
|
||||
|
||||
sharedInformerFactory.Start(ctx.Done())
|
||||
sharedInformerFactory.WaitForCacheSync(ctx.Done())
|
||||
|
||||
podEvictor := &PodEvictor{
|
||||
client: fakeClient,
|
||||
nodes: nodes,
|
||||
nodeIndexer: getPodsAssignedToNode,
|
||||
policyGroupVersion: policyv1.SchemeGroupVersion.String(),
|
||||
dryRun: false,
|
||||
maxPodsToEvictPerNode: nil,
|
||||
maxPodsToEvictPerNamespace: nil,
|
||||
evictLocalStoragePods: test.evictLocalStoragePods,
|
||||
evictSystemCriticalPods: test.evictSystemCriticalPods,
|
||||
evictFailedBarePods: test.evictFailedBarePods,
|
||||
}
|
||||
|
||||
var opts []func(opts *Options)
|
||||
if test.priorityThreshold != nil {
|
||||
opts = append(opts, WithPriorityThreshold(*test.priorityThreshold))
|
||||
}
|
||||
if test.nodeFit {
|
||||
opts = append(opts, WithNodeFit(true))
|
||||
}
|
||||
evictable := podEvictor.Evictable(opts...)
|
||||
|
||||
result := evictable.IsEvictable(test.pods[0])
|
||||
if result != test.result {
|
||||
t.Errorf("IsEvictable should return for pod %s %t, but it returns %t", test.pods[0].Name, test.result, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
func TestPodTypes(t *testing.T) {
|
||||
n1 := test.BuildTestNode("node1", 1000, 2000, 9, nil)
|
||||
p1 := test.BuildTestPod("p1", 400, 0, n1.Name, nil)
|
||||
|
||||
// These won't be evicted.
|
||||
p2 := test.BuildTestPod("p2", 400, 0, n1.Name, nil)
|
||||
p3 := test.BuildTestPod("p3", 400, 0, n1.Name, nil)
|
||||
p4 := test.BuildTestPod("p4", 400, 0, n1.Name, nil)
|
||||
|
||||
p1.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
|
||||
// The following 4 pods won't get evicted.
|
||||
// A daemonset.
|
||||
//p2.Annotations = test.GetDaemonSetAnnotation()
|
||||
p2.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
|
||||
// A pod with local storage.
|
||||
p3.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
p3.Spec.Volumes = []v1.Volume{
|
||||
{
|
||||
Name: "sample",
|
||||
VolumeSource: v1.VolumeSource{
|
||||
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
||||
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
|
||||
},
|
||||
},
|
||||
}
|
||||
// A Mirror Pod.
|
||||
p4.Annotations = test.GetMirrorPodAnnotation()
|
||||
if !utils.IsMirrorPod(p4) {
|
||||
t.Errorf("Expected p4 to be a mirror pod.")
|
||||
}
|
||||
if !utils.IsPodWithLocalStorage(p3) {
|
||||
t.Errorf("Expected p3 to be a pod with local storage.")
|
||||
}
|
||||
ownerRefList := podutil.OwnerRef(p2)
|
||||
if !utils.IsDaemonsetPod(ownerRefList) {
|
||||
t.Errorf("Expected p2 to be a daemonset pod.")
|
||||
}
|
||||
ownerRefList = podutil.OwnerRef(p1)
|
||||
if utils.IsDaemonsetPod(ownerRefList) || utils.IsPodWithLocalStorage(p1) || utils.IsCriticalPriorityPod(p1) || utils.IsMirrorPod(p1) || utils.IsStaticPod(p1) {
|
||||
t.Errorf("Expected p1 to be a normal pod.")
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -1,99 +0,0 @@
|
||||
/*
|
||||
Copyright 2022 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package descheduler
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"k8s.io/apimachinery/pkg/util/uuid"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/client-go/tools/leaderelection"
|
||||
"k8s.io/client-go/tools/leaderelection/resourcelock"
|
||||
componentbaseconfig "k8s.io/component-base/config"
|
||||
"k8s.io/klog/v2"
|
||||
"os"
|
||||
)
|
||||
|
||||
// NewLeaderElection starts the leader election code loop
|
||||
func NewLeaderElection(
|
||||
run func() error,
|
||||
client clientset.Interface,
|
||||
LeaderElectionConfig *componentbaseconfig.LeaderElectionConfiguration,
|
||||
ctx context.Context,
|
||||
) error {
|
||||
var id string
|
||||
|
||||
if hostname, err := os.Hostname(); err != nil {
|
||||
// on errors, make sure we're unique
|
||||
id = string(uuid.NewUUID())
|
||||
} else {
|
||||
// add a uniquifier so that two processes on the same host don't accidentally both become active
|
||||
id = hostname + "_" + string(uuid.NewUUID())
|
||||
}
|
||||
|
||||
klog.V(3).Infof("Assigned unique lease holder id: %s", id)
|
||||
|
||||
if len(LeaderElectionConfig.ResourceNamespace) == 0 {
|
||||
return fmt.Errorf("namespace may not be empty")
|
||||
}
|
||||
|
||||
if len(LeaderElectionConfig.ResourceName) == 0 {
|
||||
return fmt.Errorf("name may not be empty")
|
||||
}
|
||||
|
||||
lock, err := resourcelock.New(
|
||||
LeaderElectionConfig.ResourceLock,
|
||||
LeaderElectionConfig.ResourceNamespace,
|
||||
LeaderElectionConfig.ResourceName,
|
||||
client.CoreV1(),
|
||||
client.CoordinationV1(),
|
||||
resourcelock.ResourceLockConfig{
|
||||
Identity: id,
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to create leader election lock: %v", err)
|
||||
}
|
||||
|
||||
leaderelection.RunOrDie(ctx, leaderelection.LeaderElectionConfig{
|
||||
Lock: lock,
|
||||
ReleaseOnCancel: true,
|
||||
LeaseDuration: LeaderElectionConfig.LeaseDuration.Duration,
|
||||
RenewDeadline: LeaderElectionConfig.RenewDeadline.Duration,
|
||||
RetryPeriod: LeaderElectionConfig.RetryPeriod.Duration,
|
||||
Callbacks: leaderelection.LeaderCallbacks{
|
||||
OnStartedLeading: func(ctx context.Context) {
|
||||
klog.V(1).InfoS("Started leading")
|
||||
err := run()
|
||||
if err != nil {
|
||||
klog.Error(err)
|
||||
}
|
||||
},
|
||||
OnStoppedLeading: func() {
|
||||
klog.V(1).InfoS("Leader lost")
|
||||
},
|
||||
OnNewLeader: func(identity string) {
|
||||
// Just got the lock
|
||||
if identity == id {
|
||||
return
|
||||
}
|
||||
klog.V(1).Infof("New leader elected: %v", identity)
|
||||
},
|
||||
},
|
||||
})
|
||||
return nil
|
||||
}
|
||||
@@ -18,22 +18,18 @@ package node
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
coreinformers "k8s.io/client-go/informers/core/v1"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/klog/v2"
|
||||
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
"k8s.io/klog"
|
||||
"sigs.k8s.io/descheduler/pkg/utils"
|
||||
)
|
||||
|
||||
// ReadyNodes returns ready nodes irrespective of whether they are
|
||||
// schedulable or not.
|
||||
func ReadyNodes(ctx context.Context, client clientset.Interface, nodeInformer coreinformers.NodeInformer, nodeSelector string) ([]*v1.Node, error) {
|
||||
func ReadyNodes(ctx context.Context, client clientset.Interface, nodeInformer coreinformers.NodeInformer, nodeSelector string, stopChannel <-chan struct{}) ([]*v1.Node, error) {
|
||||
ns, err := labels.Parse(nodeSelector)
|
||||
if err != nil {
|
||||
return []*v1.Node{}, err
|
||||
@@ -46,7 +42,7 @@ func ReadyNodes(ctx context.Context, client clientset.Interface, nodeInformer co
|
||||
}
|
||||
|
||||
if len(nodes) == 0 {
|
||||
klog.V(2).InfoS("Node lister returned empty list, now fetch directly")
|
||||
klog.V(2).Infof("node lister returned empty list, now fetch directly")
|
||||
|
||||
nItems, err := client.CoreV1().Nodes().List(ctx, metav1.ListOptions{LabelSelector: nodeSelector})
|
||||
if err != nil {
|
||||
@@ -81,208 +77,63 @@ func IsReady(node *v1.Node) bool {
|
||||
// - NodeOutOfDisk condition status is ConditionFalse,
|
||||
// - NodeNetworkUnavailable condition status is ConditionFalse.
|
||||
if cond.Type == v1.NodeReady && cond.Status != v1.ConditionTrue {
|
||||
klog.V(1).InfoS("Ignoring node", "node", klog.KObj(node), "condition", cond.Type, "status", cond.Status)
|
||||
klog.V(1).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status)
|
||||
return false
|
||||
} /*else if cond.Type == v1.NodeOutOfDisk && cond.Status != v1.ConditionFalse {
|
||||
klog.V(4).InfoS("Ignoring node with condition status", "node", klog.KObj(node.Name), "condition", cond.Type, "status", cond.Status)
|
||||
klog.V(4).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status)
|
||||
return false
|
||||
} else if cond.Type == v1.NodeNetworkUnavailable && cond.Status != v1.ConditionFalse {
|
||||
klog.V(4).InfoS("Ignoring node with condition status", "node", klog.KObj(node.Name), "condition", cond.Type, "status", cond.Status)
|
||||
klog.V(4).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status)
|
||||
return false
|
||||
}*/
|
||||
}
|
||||
// Ignore nodes that are marked unschedulable
|
||||
/*if node.Spec.Unschedulable {
|
||||
klog.V(4).InfoS("Ignoring node since it is unschedulable", "node", klog.KObj(node.Name))
|
||||
klog.V(4).Infof("Ignoring node %v since it is unschedulable", node.Name)
|
||||
return false
|
||||
}*/
|
||||
return true
|
||||
}
|
||||
|
||||
// NodeFit returns true if the provided pod can be scheduled onto the provided node.
|
||||
// This function is used when the NodeFit pod filtering feature of the Descheduler is enabled.
|
||||
// This function currently considers a subset of the Kubernetes Scheduler's predicates when
|
||||
// deciding if a pod would fit on a node, but more predicates may be added in the future.
|
||||
func NodeFit(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v1.Node) []error {
|
||||
// Check node selector and required affinity
|
||||
var errors []error
|
||||
if ok, err := utils.PodMatchNodeSelector(pod, node); err != nil {
|
||||
errors = append(errors, err)
|
||||
} else if !ok {
|
||||
errors = append(errors, fmt.Errorf("pod node selector does not match the node label"))
|
||||
}
|
||||
// Check taints (we only care about NoSchedule and NoExecute taints)
|
||||
ok := utils.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, node.Spec.Taints, func(taint *v1.Taint) bool {
|
||||
return taint.Effect == v1.TaintEffectNoSchedule || taint.Effect == v1.TaintEffectNoExecute
|
||||
})
|
||||
if !ok {
|
||||
errors = append(errors, fmt.Errorf("pod does not tolerate taints on the node"))
|
||||
}
|
||||
// Check if the pod can fit on a node based off it's requests
|
||||
ok, reqErrors := fitsRequest(nodeIndexer, pod, node)
|
||||
if !ok {
|
||||
errors = append(errors, reqErrors...)
|
||||
}
|
||||
// Check if node is schedulable
|
||||
if IsNodeUnschedulable(node) {
|
||||
errors = append(errors, fmt.Errorf("node is not schedulable"))
|
||||
}
|
||||
|
||||
return errors
|
||||
}
|
||||
|
||||
// PodFitsAnyOtherNode checks if the given pod will fit any of the given nodes, besides the node
|
||||
// the pod is already running on. The predicates used to determine if the pod will fit can be found in the NodeFit function.
|
||||
func PodFitsAnyOtherNode(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, nodes []*v1.Node) bool {
|
||||
for _, node := range nodes {
|
||||
// Skip node pod is already on
|
||||
if node.Name == pod.Spec.NodeName {
|
||||
continue
|
||||
}
|
||||
|
||||
errors := NodeFit(nodeIndexer, pod, node)
|
||||
if len(errors) == 0 {
|
||||
klog.V(4).InfoS("Pod fits on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
|
||||
return true
|
||||
} else {
|
||||
klog.V(4).InfoS("Pod does not fit on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
|
||||
for _, err := range errors {
|
||||
klog.V(4).InfoS(err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// PodFitsAnyNode checks if the given pod will fit any of the given nodes. The predicates used
|
||||
// to determine if the pod will fit can be found in the NodeFit function.
|
||||
func PodFitsAnyNode(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, nodes []*v1.Node) bool {
|
||||
for _, node := range nodes {
|
||||
errors := NodeFit(nodeIndexer, pod, node)
|
||||
if len(errors) == 0 {
|
||||
klog.V(4).InfoS("Pod fits on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
|
||||
return true
|
||||
} else {
|
||||
klog.V(4).InfoS("Pod does not fit on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
|
||||
for _, err := range errors {
|
||||
klog.V(4).InfoS(err.Error())
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// PodFitsCurrentNode checks if the given pod will fit onto the given node. The predicates used
|
||||
// to determine if the pod will fit can be found in the NodeFit function.
|
||||
func PodFitsCurrentNode(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v1.Node) bool {
|
||||
errors := NodeFit(nodeIndexer, pod, node)
|
||||
if len(errors) == 0 {
|
||||
klog.V(4).InfoS("Pod fits on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
|
||||
return true
|
||||
} else {
|
||||
klog.V(4).InfoS("Pod does not fit on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
|
||||
for _, err := range errors {
|
||||
klog.V(4).InfoS(err.Error())
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// IsNodeUnschedulable checks if the node is unschedulable. This is a helper function to check only in case of
|
||||
// IsNodeUnschedulable checks if the node is unschedulable. This is helper function to check only in case of
|
||||
// underutilized node so that they won't be accounted for.
|
||||
func IsNodeUnschedulable(node *v1.Node) bool {
|
||||
return node.Spec.Unschedulable
|
||||
}
|
||||
|
||||
// fitsRequest determines if a pod can fit on a node based on its resource requests. It returns true if
|
||||
// the pod will fit.
|
||||
func fitsRequest(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v1.Node) (bool, []error) {
|
||||
var insufficientResources []error
|
||||
// PodFitsAnyNode checks if the given pod fits any of the given nodes, based on
|
||||
// multiple criteria, like, pod node selector matching the node label, node
|
||||
// being schedulable or not.
|
||||
func PodFitsAnyNode(pod *v1.Pod, nodes []*v1.Node) bool {
|
||||
for _, node := range nodes {
|
||||
|
||||
// Get pod requests
|
||||
podRequests, _ := utils.PodRequestsAndLimits(pod)
|
||||
resourceNames := make([]v1.ResourceName, 0, len(podRequests))
|
||||
for name := range podRequests {
|
||||
resourceNames = append(resourceNames, name)
|
||||
ok, err := utils.PodMatchNodeSelector(pod, node)
|
||||
if err != nil || !ok {
|
||||
continue
|
||||
}
|
||||
if !IsNodeUnschedulable(node) {
|
||||
klog.V(2).Infof("Pod %v can possibly be scheduled on %v", pod.Name, node.Name)
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// PodFitsCurrentNode checks if the given pod fits on the given node if the pod
|
||||
// node selector matches the node label.
|
||||
func PodFitsCurrentNode(pod *v1.Pod, node *v1.Node) bool {
|
||||
ok, err := utils.PodMatchNodeSelector(pod, node)
|
||||
|
||||
availableResources, err := nodeAvailableResources(nodeIndexer, node, resourceNames)
|
||||
if err != nil {
|
||||
return false, []error{err}
|
||||
}
|
||||
|
||||
podFitsOnNode := true
|
||||
for _, resource := range resourceNames {
|
||||
podResourceRequest := podRequests[resource]
|
||||
availableResource, ok := availableResources[resource]
|
||||
if !ok || podResourceRequest.MilliValue() > availableResource.MilliValue() {
|
||||
insufficientResources = append(insufficientResources, fmt.Errorf("insufficient %v", resource))
|
||||
podFitsOnNode = false
|
||||
}
|
||||
}
|
||||
return podFitsOnNode, insufficientResources
|
||||
}
|
||||
|
||||
// nodeAvailableResources returns resources mapped to the quanitity available on the node.
|
||||
func nodeAvailableResources(nodeIndexer podutil.GetPodsAssignedToNodeFunc, node *v1.Node, resourceNames []v1.ResourceName) (map[v1.ResourceName]*resource.Quantity, error) {
|
||||
podsOnNode, err := podutil.ListPodsOnANode(node.Name, nodeIndexer, nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
nodeUtilization := NodeUtilization(podsOnNode, resourceNames)
|
||||
remainingResources := map[v1.ResourceName]*resource.Quantity{
|
||||
v1.ResourceCPU: resource.NewMilliQuantity(node.Status.Allocatable.Cpu().MilliValue()-nodeUtilization[v1.ResourceCPU].MilliValue(), resource.DecimalSI),
|
||||
v1.ResourceMemory: resource.NewQuantity(node.Status.Allocatable.Memory().Value()-nodeUtilization[v1.ResourceMemory].Value(), resource.BinarySI),
|
||||
v1.ResourcePods: resource.NewQuantity(node.Status.Allocatable.Pods().Value()-nodeUtilization[v1.ResourcePods].Value(), resource.DecimalSI),
|
||||
}
|
||||
for _, name := range resourceNames {
|
||||
if !IsBasicResource(name) {
|
||||
if _, exists := node.Status.Allocatable[name]; exists {
|
||||
allocatableResource := node.Status.Allocatable[name]
|
||||
remainingResources[name] = resource.NewQuantity(allocatableResource.Value()-nodeUtilization[name].Value(), resource.DecimalSI)
|
||||
} else {
|
||||
remainingResources[name] = resource.NewQuantity(0, resource.DecimalSI)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return remainingResources, nil
|
||||
}
|
||||
|
||||
// NodeUtilization returns the resources requested by the given pods. Only resources supplied in the resourceNames parameter are calculated.
|
||||
func NodeUtilization(pods []*v1.Pod, resourceNames []v1.ResourceName) map[v1.ResourceName]*resource.Quantity {
|
||||
totalReqs := map[v1.ResourceName]*resource.Quantity{
|
||||
v1.ResourceCPU: resource.NewMilliQuantity(0, resource.DecimalSI),
|
||||
v1.ResourceMemory: resource.NewQuantity(0, resource.BinarySI),
|
||||
v1.ResourcePods: resource.NewQuantity(int64(len(pods)), resource.DecimalSI),
|
||||
}
|
||||
for _, name := range resourceNames {
|
||||
if !IsBasicResource(name) {
|
||||
totalReqs[name] = resource.NewQuantity(0, resource.DecimalSI)
|
||||
}
|
||||
}
|
||||
|
||||
for _, pod := range pods {
|
||||
req, _ := utils.PodRequestsAndLimits(pod)
|
||||
for _, name := range resourceNames {
|
||||
quantity, ok := req[name]
|
||||
if ok && name != v1.ResourcePods {
|
||||
// As Quantity.Add says: Add adds the provided y quantity to the current value. If the current value is zero,
|
||||
// the format of the quantity will be updated to the format of y.
|
||||
totalReqs[name].Add(quantity)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return totalReqs
|
||||
}
|
||||
|
||||
// IsBasicResource checks if resource is basic native.
|
||||
func IsBasicResource(name v1.ResourceName) bool {
|
||||
switch name {
|
||||
case v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods:
|
||||
return true
|
||||
default:
|
||||
klog.Error(err)
|
||||
return false
|
||||
}
|
||||
|
||||
if !ok {
|
||||
klog.V(1).Infof("Pod %v does not fit on node %v", pod.Name, node.Name)
|
||||
return false
|
||||
}
|
||||
|
||||
klog.V(3).Infof("Pod %v fits on node %v", pod.Name, node.Name)
|
||||
return true
|
||||
}
|
||||
|
||||
@@ -20,13 +20,10 @@ import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/client-go/informers"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
"sigs.k8s.io/descheduler/test"
|
||||
)
|
||||
|
||||
@@ -72,12 +69,12 @@ func TestReadyNodesWithNodeSelector(t *testing.T) {
|
||||
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
|
||||
nodeInformer := sharedInformerFactory.Core().V1().Nodes()
|
||||
|
||||
stopChannel := make(chan struct{})
|
||||
stopChannel := make(chan struct{}, 0)
|
||||
sharedInformerFactory.Start(stopChannel)
|
||||
sharedInformerFactory.WaitForCacheSync(stopChannel)
|
||||
defer close(stopChannel)
|
||||
|
||||
nodes, _ := ReadyNodes(ctx, fakeClient, nodeInformer, nodeSelector)
|
||||
nodes, _ := ReadyNodes(ctx, fakeClient, nodeInformer, nodeSelector, nil)
|
||||
|
||||
if nodes[0].Name != "node1" {
|
||||
t.Errorf("Expected node1, got %s", nodes[0].Name)
|
||||
@@ -150,13 +147,13 @@ func TestPodFitsCurrentNode(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
node: test.BuildTestNode("node1", 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
node: &v1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Labels: map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
},
|
||||
},
|
||||
},
|
||||
success: true,
|
||||
},
|
||||
{
|
||||
@@ -184,582 +181,217 @@ func TestPodFitsCurrentNode(t *testing.T) {
|
||||
},
|
||||
},
|
||||
},
|
||||
node: test.BuildTestNode("node1", 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: "no",
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
node: &v1.Node{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Labels: map[string]string{
|
||||
nodeLabelKey: "no",
|
||||
},
|
||||
},
|
||||
},
|
||||
success: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
var objs []runtime.Object
|
||||
objs = append(objs, tc.node)
|
||||
objs = append(objs, tc.pod)
|
||||
|
||||
fakeClient := fake.NewSimpleClientset(objs...)
|
||||
|
||||
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
|
||||
podInformer := sharedInformerFactory.Core().V1().Pods()
|
||||
|
||||
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
|
||||
if err != nil {
|
||||
t.Errorf("Build get pods assigned to node function error: %v", err)
|
||||
}
|
||||
|
||||
sharedInformerFactory.Start(ctx.Done())
|
||||
sharedInformerFactory.WaitForCacheSync(ctx.Done())
|
||||
|
||||
actual := PodFitsCurrentNode(getPodsAssignedToNode, tc.pod, tc.node)
|
||||
if actual != tc.success {
|
||||
t.Errorf("Test %#v failed", tc.description)
|
||||
}
|
||||
})
|
||||
actual := PodFitsCurrentNode(tc.pod, tc.node)
|
||||
if actual != tc.success {
|
||||
t.Errorf("Test %#v failed", tc.description)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestPodFitsAnyOtherNode(t *testing.T) {
|
||||
func TestPodFitsAnyNode(t *testing.T) {
|
||||
|
||||
nodeLabelKey := "kubernetes.io/desiredNode"
|
||||
nodeLabelValue := "yes"
|
||||
nodeTaintKey := "hardware"
|
||||
nodeTaintValue := "gpu"
|
||||
|
||||
// Staging node has no scheduling restrictions, but the pod always starts here and PodFitsAnyOtherNode() doesn't take into account the node the pod is running on.
|
||||
nodeNames := []string{"node1", "node2", "stagingNode"}
|
||||
|
||||
tests := []struct {
|
||||
description string
|
||||
pod *v1.Pod
|
||||
nodes []*v1.Node
|
||||
success bool
|
||||
podsOnNodes []*v1.Pod
|
||||
}{
|
||||
{
|
||||
description: "Pod fits another node matching node affinity",
|
||||
pod: test.BuildTestPod("p1", 0, 0, nodeNames[2], func(pod *v1.Pod) {
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: "no",
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
|
||||
description: "Pod expected to fit one of the nodes",
|
||||
pod: &v1.Pod{
|
||||
Spec: v1.PodSpec{
|
||||
Affinity: &v1.Affinity{
|
||||
NodeAffinity: &v1.NodeAffinity{
|
||||
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
|
||||
NodeSelectorTerms: []v1.NodeSelectorTerm{
|
||||
{
|
||||
MatchExpressions: []v1.NodeSelectorRequirement{
|
||||
{
|
||||
Key: nodeLabelKey,
|
||||
Operator: "In",
|
||||
Values: []string{
|
||||
nodeLabelValue,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
podsOnNodes: []*v1.Pod{},
|
||||
success: true,
|
||||
nodes: []*v1.Node{
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Labels: map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Labels: map[string]string{
|
||||
nodeLabelKey: "no",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
success: true,
|
||||
},
|
||||
{
|
||||
description: "Pod expected to fit one of the nodes",
|
||||
pod: test.BuildTestPod("p1", 0, 0, nodeNames[2], func(pod *v1.Pod) {
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: "no",
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
|
||||
description: "Pod expected to fit one of the nodes (error node first)",
|
||||
pod: &v1.Pod{
|
||||
Spec: v1.PodSpec{
|
||||
Affinity: &v1.Affinity{
|
||||
NodeAffinity: &v1.NodeAffinity{
|
||||
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
|
||||
NodeSelectorTerms: []v1.NodeSelectorTerm{
|
||||
{
|
||||
MatchExpressions: []v1.NodeSelectorRequirement{
|
||||
{
|
||||
Key: nodeLabelKey,
|
||||
Operator: "In",
|
||||
Values: []string{
|
||||
nodeLabelValue,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
podsOnNodes: []*v1.Pod{},
|
||||
success: true,
|
||||
nodes: []*v1.Node{
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Labels: map[string]string{
|
||||
nodeLabelKey: "no",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Labels: map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
success: true,
|
||||
},
|
||||
{
|
||||
description: "Pod expected to fit none of the nodes",
|
||||
pod: test.BuildTestPod("p1", 0, 0, nodeNames[2], func(pod *v1.Pod) {
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: "unfit1",
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: "unfit2",
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
|
||||
pod: &v1.Pod{
|
||||
Spec: v1.PodSpec{
|
||||
Affinity: &v1.Affinity{
|
||||
NodeAffinity: &v1.NodeAffinity{
|
||||
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
|
||||
NodeSelectorTerms: []v1.NodeSelectorTerm{
|
||||
{
|
||||
MatchExpressions: []v1.NodeSelectorRequirement{
|
||||
{
|
||||
Key: nodeLabelKey,
|
||||
Operator: "In",
|
||||
Values: []string{
|
||||
nodeLabelValue,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
podsOnNodes: []*v1.Pod{},
|
||||
success: false,
|
||||
nodes: []*v1.Node{
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Labels: map[string]string{
|
||||
nodeLabelKey: "unfit1",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Labels: map[string]string{
|
||||
nodeLabelKey: "unfit2",
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
success: false,
|
||||
},
|
||||
{
|
||||
description: "Nodes are unschedulable but labels match, should fail",
|
||||
pod: test.BuildTestPod("p1", 0, 0, nodeNames[2], func(pod *v1.Pod) {
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
}),
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
node.Spec.Unschedulable = true
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: "no",
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
|
||||
},
|
||||
podsOnNodes: []*v1.Pod{},
|
||||
success: false,
|
||||
},
|
||||
{
|
||||
description: "Both nodes are tained, should fail",
|
||||
pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: nodeTaintKey,
|
||||
Value: nodeTaintValue,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
pod: &v1.Pod{
|
||||
Spec: v1.PodSpec{
|
||||
Affinity: &v1.Affinity{
|
||||
NodeAffinity: &v1.NodeAffinity{
|
||||
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
|
||||
NodeSelectorTerms: []v1.NodeSelectorTerm{
|
||||
{
|
||||
MatchExpressions: []v1.NodeSelectorRequirement{
|
||||
{
|
||||
Key: nodeLabelKey,
|
||||
Operator: "In",
|
||||
Values: []string{
|
||||
nodeLabelValue,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: nodeTaintKey,
|
||||
Value: nodeTaintValue,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
|
||||
},
|
||||
},
|
||||
},
|
||||
podsOnNodes: []*v1.Pod{},
|
||||
success: false,
|
||||
},
|
||||
{
|
||||
description: "Two nodes matches node selector, one of them is tained, there is a pod on the available node, and requests are low, should pass",
|
||||
pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: nodeTaintKey,
|
||||
Value: nodeTaintValue,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
|
||||
},
|
||||
podsOnNodes: []*v1.Pod{
|
||||
test.BuildTestPod("test-pod", 12*1000, 20*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
|
||||
pod.ObjectMeta = metav1.ObjectMeta{
|
||||
Namespace: "test",
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Labels: map[string]string{
|
||||
"test": "true",
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
},
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(40*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
},
|
||||
success: true,
|
||||
},
|
||||
{
|
||||
description: "Two nodes matches node selector, one of them is tained, but CPU requests are too big, should fail",
|
||||
pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: nodeTaintKey,
|
||||
Value: nodeTaintValue,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
}),
|
||||
// Notice that this node only has 4 cores, the pod already on the node below requests 3 cores, and the pod above requests 2 cores
|
||||
test.BuildTestNode(nodeNames[1], 4000, 8*1000*1000*1000, 12, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
|
||||
},
|
||||
podsOnNodes: []*v1.Pod{
|
||||
test.BuildTestPod("3-core-pod", 3000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
|
||||
pod.ObjectMeta = metav1.ObjectMeta{
|
||||
Namespace: "test",
|
||||
},
|
||||
Spec: v1.NodeSpec{
|
||||
Unschedulable: true,
|
||||
},
|
||||
},
|
||||
{
|
||||
ObjectMeta: metav1.ObjectMeta{
|
||||
Labels: map[string]string{
|
||||
"test": "true",
|
||||
nodeLabelKey: "no",
|
||||
},
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
},
|
||||
success: false,
|
||||
},
|
||||
{
|
||||
description: "Two nodes matches node selector, one of them is tained, but memory requests are too big, should fail",
|
||||
pod: test.BuildTestPod("p1", 2000, 5*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: nodeTaintKey,
|
||||
Value: nodeTaintValue,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
}),
|
||||
// Notice that this node only has 8GB of memory, the pod already on the node below requests 4GB, and the pod above requests 5GB
|
||||
test.BuildTestNode(nodeNames[1], 10*1000, 8*1000*1000*1000, 12, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
|
||||
},
|
||||
podsOnNodes: []*v1.Pod{
|
||||
test.BuildTestPod("4GB-mem-pod", 2000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
|
||||
pod.ObjectMeta = metav1.ObjectMeta{
|
||||
Namespace: "test",
|
||||
Labels: map[string]string{
|
||||
"test": "true",
|
||||
},
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
},
|
||||
success: false,
|
||||
},
|
||||
{
|
||||
description: "Two nodes matches node selector, one of them is tained, but ephemeral storage requests are too big, should fail",
|
||||
pod: test.BuildTestPod("p1", 2000, 4*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: nodeTaintKey,
|
||||
Value: nodeTaintValue,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
}),
|
||||
// Notice that this node only has 20GB of storage, the pod already on the node below requests 11GB, and the pod above requests 10GB
|
||||
test.BuildTestNode(nodeNames[1], 10*1000, 8*1000*1000*1000, 12, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(20*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
|
||||
},
|
||||
podsOnNodes: []*v1.Pod{
|
||||
test.BuildTestPod("11GB-storage-pod", 2000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
|
||||
pod.ObjectMeta = metav1.ObjectMeta{
|
||||
Namespace: "test",
|
||||
Labels: map[string]string{
|
||||
"test": "true",
|
||||
},
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(11*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
},
|
||||
success: false,
|
||||
},
|
||||
{
|
||||
description: "Two nodes matches node selector, one of them is tained, but custom resource requests are too big, should fail",
|
||||
pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
|
||||
pod.Spec.Containers[0].Resources.Requests["example.com/custom-resource"] = *resource.NewQuantity(10, resource.DecimalSI)
|
||||
}),
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: nodeTaintKey,
|
||||
Value: nodeTaintValue,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
node.Status.Allocatable["example.com/custom-resource"] = *resource.NewQuantity(15, resource.DecimalSI)
|
||||
}),
|
||||
// Notice that this node only has 15 of the custom resource, the pod already on the node below requests 10, and the pod above requests 10
|
||||
test.BuildTestNode(nodeNames[1], 10*1000, 8*1000*1000*1000, 12, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI)
|
||||
node.Status.Allocatable["example.com/custom-resource"] = *resource.NewQuantity(15, resource.DecimalSI)
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
|
||||
},
|
||||
podsOnNodes: []*v1.Pod{
|
||||
test.BuildTestPod("10-custom-resource-pod", 0, 0, nodeNames[1], func(pod *v1.Pod) {
|
||||
pod.ObjectMeta = metav1.ObjectMeta{
|
||||
Namespace: "test",
|
||||
Labels: map[string]string{
|
||||
"test": "true",
|
||||
},
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests["example.com/custom-resource"] = *resource.NewQuantity(10, resource.DecimalSI)
|
||||
}),
|
||||
},
|
||||
success: false,
|
||||
},
|
||||
{
|
||||
description: "Two nodes matches node selector, one of them is tained, CPU requests will fit, and pod Overhead is low enough, should pass",
|
||||
pod: test.BuildTestPod("p1", 1000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: nodeTaintKey,
|
||||
Value: nodeTaintValue,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
}),
|
||||
// Notice that this node has 5 CPU cores, the pod below requests 2 cores, and has CPU overhead of 1 cores, and the pod above requests 1 core
|
||||
test.BuildTestNode(nodeNames[1], 5000, 8*1000*1000*1000, 12, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
|
||||
},
|
||||
podsOnNodes: []*v1.Pod{
|
||||
test.BuildTestPod("3-core-pod", 2000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
|
||||
pod.ObjectMeta = metav1.ObjectMeta{
|
||||
Namespace: "test",
|
||||
Labels: map[string]string{
|
||||
"test": "true",
|
||||
},
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
|
||||
pod.Spec.Overhead = createResourceList(1000, 1000*1000*1000, 1000*1000*1000)
|
||||
}),
|
||||
},
|
||||
success: true,
|
||||
},
|
||||
{
|
||||
description: "Two nodes matches node selector, one of them is tained, CPU requests will fit, but pod Overhead is too high, should fail",
|
||||
pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: nodeTaintKey,
|
||||
Value: nodeTaintValue,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
}),
|
||||
// Notice that this node only has 5 CPU cores, the pod below requests 2 cores, but has CPU overhead of 2 cores, and the pod above requests 2 cores
|
||||
test.BuildTestNode(nodeNames[1], 5000, 8*1000*1000*1000, 12, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeLabelKey: nodeLabelValue,
|
||||
}
|
||||
|
||||
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI)
|
||||
}),
|
||||
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
|
||||
},
|
||||
podsOnNodes: []*v1.Pod{
|
||||
test.BuildTestPod("3-core-pod", 2000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
|
||||
pod.ObjectMeta = metav1.ObjectMeta{
|
||||
Namespace: "test",
|
||||
Labels: map[string]string{
|
||||
"test": "true",
|
||||
},
|
||||
}
|
||||
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
|
||||
pod.Spec.Overhead = createResourceList(2000, 1000*1000*1000, 1000*1000*1000)
|
||||
}),
|
||||
},
|
||||
},
|
||||
},
|
||||
success: false,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
var objs []runtime.Object
|
||||
for _, node := range tc.nodes {
|
||||
objs = append(objs, node)
|
||||
}
|
||||
for _, pod := range tc.podsOnNodes {
|
||||
objs = append(objs, pod)
|
||||
}
|
||||
objs = append(objs, tc.pod)
|
||||
|
||||
fakeClient := fake.NewSimpleClientset(objs...)
|
||||
|
||||
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
|
||||
podInformer := sharedInformerFactory.Core().V1().Pods()
|
||||
|
||||
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
|
||||
if err != nil {
|
||||
t.Errorf("Build get pods assigned to node function error: %v", err)
|
||||
}
|
||||
|
||||
sharedInformerFactory.Start(ctx.Done())
|
||||
sharedInformerFactory.WaitForCacheSync(ctx.Done())
|
||||
|
||||
actual := PodFitsAnyOtherNode(getPodsAssignedToNode, tc.pod, tc.nodes)
|
||||
if actual != tc.success {
|
||||
t.Errorf("Test %#v failed", tc.description)
|
||||
}
|
||||
})
|
||||
actual := PodFitsAnyNode(tc.pod, tc.nodes)
|
||||
if actual != tc.success {
|
||||
t.Errorf("Test %#v failed", tc.description)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// createResourceList builds a small resource list of core resources
|
||||
func createResourceList(cpu int64, memory int64, ephemeralStorage int64) v1.ResourceList {
|
||||
resourceList := make(map[v1.ResourceName]resource.Quantity)
|
||||
resourceList[v1.ResourceCPU] = *resource.NewMilliQuantity(cpu, resource.DecimalSI)
|
||||
resourceList[v1.ResourceMemory] = *resource.NewQuantity(memory, resource.DecimalSI)
|
||||
resourceList[v1.ResourceEphemeralStorage] = *resource.NewQuantity(ephemeralStorage, resource.DecimalSI)
|
||||
return resourceList
|
||||
}
|
||||
|
||||
@@ -17,180 +17,65 @@ limitations under the License.
|
||||
package pod
|
||||
|
||||
import (
|
||||
"sort"
|
||||
|
||||
"context"
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/labels"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
coreinformers "k8s.io/client-go/informers/core/v1"
|
||||
"k8s.io/client-go/tools/cache"
|
||||
|
||||
"k8s.io/apimachinery/pkg/fields"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"sigs.k8s.io/descheduler/pkg/utils"
|
||||
)
|
||||
|
||||
const (
|
||||
nodeNameKeyIndex = "spec.nodeName"
|
||||
evictPodAnnotationKey = "descheduler.alpha.kubernetes.io/evict"
|
||||
)
|
||||
|
||||
// FilterFunc is a filter for a pod.
|
||||
type FilterFunc func(*v1.Pod) bool
|
||||
|
||||
// GetPodsAssignedToNodeFunc is a function which accept a node name and a pod filter function
|
||||
// as input and returns the pods that assigned to the node.
|
||||
type GetPodsAssignedToNodeFunc func(string, FilterFunc) ([]*v1.Pod, error)
|
||||
|
||||
// WrapFilterFuncs wraps a set of FilterFunc in one.
|
||||
func WrapFilterFuncs(filters ...FilterFunc) FilterFunc {
|
||||
return func(pod *v1.Pod) bool {
|
||||
for _, filter := range filters {
|
||||
if filter != nil && !filter(pod) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
// IsEvictable checks if a pod is evictable or not.
|
||||
func IsEvictable(pod *v1.Pod, evictLocalStoragePods bool) bool {
|
||||
ownerRefList := OwnerRef(pod)
|
||||
if !HaveEvictAnnotation(pod) && (IsMirrorPod(pod) || (!evictLocalStoragePods && IsPodWithLocalStorage(pod)) || len(ownerRefList) == 0 || IsDaemonsetPod(ownerRefList) || IsCriticalPod(pod)) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
type Options struct {
|
||||
filter FilterFunc
|
||||
includedNamespaces sets.String
|
||||
excludedNamespaces sets.String
|
||||
labelSelector *metav1.LabelSelector
|
||||
}
|
||||
|
||||
// NewOptions returns an empty Options.
|
||||
func NewOptions() *Options {
|
||||
return &Options{}
|
||||
}
|
||||
|
||||
// WithFilter sets a pod filter.
|
||||
// The filter function should return true if the pod should be returned from ListPodsOnANode
|
||||
func (o *Options) WithFilter(filter FilterFunc) *Options {
|
||||
o.filter = filter
|
||||
return o
|
||||
}
|
||||
|
||||
// WithNamespaces sets included namespaces
|
||||
func (o *Options) WithNamespaces(namespaces sets.String) *Options {
|
||||
o.includedNamespaces = namespaces
|
||||
return o
|
||||
}
|
||||
|
||||
// WithoutNamespaces sets excluded namespaces
|
||||
func (o *Options) WithoutNamespaces(namespaces sets.String) *Options {
|
||||
o.excludedNamespaces = namespaces
|
||||
return o
|
||||
}
|
||||
|
||||
// WithLabelSelector sets a pod label selector
|
||||
func (o *Options) WithLabelSelector(labelSelector *metav1.LabelSelector) *Options {
|
||||
o.labelSelector = labelSelector
|
||||
return o
|
||||
}
|
||||
|
||||
// BuildFilterFunc builds a final FilterFunc based on Options.
|
||||
func (o *Options) BuildFilterFunc() (FilterFunc, error) {
|
||||
var s labels.Selector
|
||||
var err error
|
||||
if o.labelSelector != nil {
|
||||
s, err = metav1.LabelSelectorAsSelector(o.labelSelector)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
return func(pod *v1.Pod) bool {
|
||||
if o.filter != nil && !o.filter(pod) {
|
||||
return false
|
||||
}
|
||||
if len(o.includedNamespaces) > 0 && !o.includedNamespaces.Has(pod.Namespace) {
|
||||
return false
|
||||
}
|
||||
if len(o.excludedNamespaces) > 0 && o.excludedNamespaces.Has(pod.Namespace) {
|
||||
return false
|
||||
}
|
||||
if s != nil && !s.Matches(labels.Set(pod.GetLabels())) {
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}, nil
|
||||
}
|
||||
|
||||
// BuildGetPodsAssignedToNodeFunc establishes an indexer to map the pods and their assigned nodes.
|
||||
// It returns a function to help us get all the pods that assigned to a node based on the indexer.
|
||||
func BuildGetPodsAssignedToNodeFunc(podInformer coreinformers.PodInformer) (GetPodsAssignedToNodeFunc, error) {
|
||||
// Establish an indexer to map the pods and their assigned nodes.
|
||||
err := podInformer.Informer().AddIndexers(cache.Indexers{
|
||||
nodeNameKeyIndex: func(obj interface{}) ([]string, error) {
|
||||
pod, ok := obj.(*v1.Pod)
|
||||
if !ok {
|
||||
return []string{}, nil
|
||||
}
|
||||
if len(pod.Spec.NodeName) == 0 {
|
||||
return []string{}, nil
|
||||
}
|
||||
return []string{pod.Spec.NodeName}, nil
|
||||
},
|
||||
})
|
||||
// ListEvictablePodsOnNode returns the list of evictable pods on node.
|
||||
func ListEvictablePodsOnNode(ctx context.Context, client clientset.Interface, node *v1.Node, evictLocalStoragePods bool) ([]*v1.Pod, error) {
|
||||
pods, err := ListPodsOnANode(ctx, client, node)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return []*v1.Pod{}, err
|
||||
}
|
||||
|
||||
// The indexer helps us get all the pods that assigned to a node.
|
||||
podIndexer := podInformer.Informer().GetIndexer()
|
||||
getPodsAssignedToNode := func(nodeName string, filter FilterFunc) ([]*v1.Pod, error) {
|
||||
objs, err := podIndexer.ByIndex(nodeNameKeyIndex, nodeName)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
evictablePods := make([]*v1.Pod, 0)
|
||||
for _, pod := range pods {
|
||||
if !IsEvictable(pod, evictLocalStoragePods) {
|
||||
continue
|
||||
} else {
|
||||
evictablePods = append(evictablePods, pod)
|
||||
}
|
||||
pods := make([]*v1.Pod, 0, len(objs))
|
||||
for _, obj := range objs {
|
||||
pod, ok := obj.(*v1.Pod)
|
||||
if !ok {
|
||||
continue
|
||||
}
|
||||
if filter(pod) {
|
||||
pods = append(pods, pod)
|
||||
}
|
||||
}
|
||||
return pods, nil
|
||||
}
|
||||
return getPodsAssignedToNode, nil
|
||||
return evictablePods, nil
|
||||
}
|
||||
|
||||
// ListPodsOnANode lists all pods on a node.
|
||||
// It also accepts a "filter" function which can be used to further limit the pods that are returned.
|
||||
// (Usually this is podEvictor.Evictable().IsEvictable, in order to only list the evictable pods on a node, but can
|
||||
// be used by strategies to extend it if there are further restrictions, such as with NodeAffinity).
|
||||
func ListPodsOnANode(
|
||||
nodeName string,
|
||||
getPodsAssignedToNode GetPodsAssignedToNodeFunc,
|
||||
filter FilterFunc,
|
||||
) ([]*v1.Pod, error) {
|
||||
// Succeeded and failed pods are not considered because they don't occupy any resource.
|
||||
f := func(pod *v1.Pod) bool {
|
||||
return pod.Status.Phase != v1.PodSucceeded && pod.Status.Phase != v1.PodFailed
|
||||
}
|
||||
return ListAllPodsOnANode(nodeName, getPodsAssignedToNode, WrapFilterFuncs(f, filter))
|
||||
}
|
||||
|
||||
// ListAllPodsOnANode lists all the pods on a node no matter what the phase of the pod is.
|
||||
func ListAllPodsOnANode(
|
||||
nodeName string,
|
||||
getPodsAssignedToNode GetPodsAssignedToNodeFunc,
|
||||
filter FilterFunc,
|
||||
) ([]*v1.Pod, error) {
|
||||
pods, err := getPodsAssignedToNode(nodeName, filter)
|
||||
func ListPodsOnANode(ctx context.Context, client clientset.Interface, node *v1.Node) ([]*v1.Pod, error) {
|
||||
fieldSelector, err := fields.ParseSelector("spec.nodeName=" + node.Name + ",status.phase!=" + string(v1.PodSucceeded) + ",status.phase!=" + string(v1.PodFailed))
|
||||
if err != nil {
|
||||
return []*v1.Pod{}, err
|
||||
}
|
||||
|
||||
podList, err := client.CoreV1().Pods(v1.NamespaceAll).List(ctx,
|
||||
metav1.ListOptions{FieldSelector: fieldSelector.String()})
|
||||
if err != nil {
|
||||
return []*v1.Pod{}, err
|
||||
}
|
||||
|
||||
pods := make([]*v1.Pod, 0)
|
||||
for i := range podList.Items {
|
||||
pods = append(pods, &podList.Items[i])
|
||||
}
|
||||
return pods, nil
|
||||
}
|
||||
|
||||
// OwnerRef returns the ownerRefList for the pod.
|
||||
func OwnerRef(pod *v1.Pod) []metav1.OwnerReference {
|
||||
return pod.ObjectMeta.GetOwnerReferences()
|
||||
func IsCriticalPod(pod *v1.Pod) bool {
|
||||
return utils.IsCriticalPod(pod)
|
||||
}
|
||||
|
||||
func IsBestEffortPod(pod *v1.Pod) bool {
|
||||
@@ -205,26 +90,37 @@ func IsGuaranteedPod(pod *v1.Pod) bool {
|
||||
return utils.GetPodQOS(pod) == v1.PodQOSGuaranteed
|
||||
}
|
||||
|
||||
// SortPodsBasedOnPriorityLowToHigh sorts pods based on their priorities from low to high.
|
||||
// If pods have same priorities, they will be sorted by QoS in the following order:
|
||||
// BestEffort, Burstable, Guaranteed
|
||||
func SortPodsBasedOnPriorityLowToHigh(pods []*v1.Pod) {
|
||||
sort.Slice(pods, func(i, j int) bool {
|
||||
if pods[i].Spec.Priority == nil && pods[j].Spec.Priority != nil {
|
||||
func IsDaemonsetPod(ownerRefList []metav1.OwnerReference) bool {
|
||||
for _, ownerRef := range ownerRefList {
|
||||
if ownerRef.Kind == "DaemonSet" {
|
||||
return true
|
||||
}
|
||||
if pods[j].Spec.Priority == nil && pods[i].Spec.Priority != nil {
|
||||
return false
|
||||
}
|
||||
if (pods[j].Spec.Priority == nil && pods[i].Spec.Priority == nil) || (*pods[i].Spec.Priority == *pods[j].Spec.Priority) {
|
||||
if IsBestEffortPod(pods[i]) {
|
||||
return true
|
||||
}
|
||||
if IsBurstablePod(pods[i]) && IsGuaranteedPod(pods[j]) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
return *pods[i].Spec.Priority < *pods[j].Spec.Priority
|
||||
})
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// IsMirrorPod checks whether the pod is a mirror pod.
|
||||
func IsMirrorPod(pod *v1.Pod) bool {
|
||||
return utils.IsMirrorPod(pod)
|
||||
}
|
||||
|
||||
// HaveEvictAnnotation checks if the pod have evict annotation
|
||||
func HaveEvictAnnotation(pod *v1.Pod) bool {
|
||||
_, found := pod.ObjectMeta.Annotations[evictPodAnnotationKey]
|
||||
return found
|
||||
}
|
||||
|
||||
func IsPodWithLocalStorage(pod *v1.Pod) bool {
|
||||
for _, volume := range pod.Spec.Volumes {
|
||||
if volume.HostPath != nil || volume.EmptyDir != nil {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// OwnerRef returns the ownerRefList for the pod.
|
||||
func OwnerRef(pod *v1.Pod) []metav1.OwnerReference {
|
||||
return pod.ObjectMeta.GetOwnerReferences()
|
||||
}
|
||||
|
||||
@@ -17,140 +17,221 @@ limitations under the License.
|
||||
package pod
|
||||
|
||||
import (
|
||||
"context"
|
||||
"reflect"
|
||||
"testing"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/client-go/informers"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"sigs.k8s.io/descheduler/pkg/utils"
|
||||
"sigs.k8s.io/descheduler/test"
|
||||
)
|
||||
|
||||
var (
|
||||
lowPriority = int32(0)
|
||||
highPriority = int32(10000)
|
||||
)
|
||||
func TestIsEvictable(t *testing.T) {
|
||||
n1 := test.BuildTestNode("node1", 1000, 2000, 13, nil)
|
||||
type testCase struct {
|
||||
pod *v1.Pod
|
||||
runBefore func(*v1.Pod)
|
||||
evictLocalStoragePods bool
|
||||
result bool
|
||||
}
|
||||
|
||||
func TestListPodsOnANode(t *testing.T) {
|
||||
testCases := []struct {
|
||||
name string
|
||||
pods []*v1.Pod
|
||||
node *v1.Node
|
||||
labelSelector *metav1.LabelSelector
|
||||
expectedPodCount int
|
||||
}{
|
||||
testCases := []testCase{
|
||||
{
|
||||
name: "test listing pods on a node",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("pod1", 100, 0, "n1", nil),
|
||||
test.BuildTestPod("pod2", 100, 0, "n1", nil),
|
||||
test.BuildTestPod("pod3", 100, 0, "n2", nil),
|
||||
pod: test.BuildTestPod("p1", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
},
|
||||
node: test.BuildTestNode("n1", 2000, 3000, 10, nil),
|
||||
labelSelector: nil,
|
||||
expectedPodCount: 2,
|
||||
},
|
||||
{
|
||||
name: "test listing pods with label selector",
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("pod1", 100, 0, "n1", nil),
|
||||
test.BuildTestPod("pod2", 100, 0, "n1", func(pod *v1.Pod) {
|
||||
pod.Labels = map[string]string{"foo": "bar"}
|
||||
}),
|
||||
test.BuildTestPod("pod3", 100, 0, "n1", func(pod *v1.Pod) {
|
||||
pod.Labels = map[string]string{"foo": "bar1"}
|
||||
}),
|
||||
test.BuildTestPod("pod4", 100, 0, "n2", nil),
|
||||
evictLocalStoragePods: false,
|
||||
result: true,
|
||||
}, {
|
||||
pod: test.BuildTestPod("p2", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod) {
|
||||
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
},
|
||||
node: test.BuildTestNode("n1", 2000, 3000, 10, nil),
|
||||
labelSelector: &metav1.LabelSelector{
|
||||
MatchExpressions: []metav1.LabelSelectorRequirement{
|
||||
evictLocalStoragePods: false,
|
||||
result: true,
|
||||
}, {
|
||||
pod: test.BuildTestPod("p3", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
result: true,
|
||||
}, {
|
||||
pod: test.BuildTestPod("p4", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod) {
|
||||
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
|
||||
pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
result: true,
|
||||
}, {
|
||||
pod: test.BuildTestPod("p5", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Spec.Volumes = []v1.Volume{
|
||||
{
|
||||
Key: "foo",
|
||||
Operator: metav1.LabelSelectorOpIn,
|
||||
Values: []string{"bar", "bar1"},
|
||||
Name: "sample",
|
||||
VolumeSource: v1.VolumeSource{
|
||||
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
||||
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
expectedPodCount: 2,
|
||||
evictLocalStoragePods: false,
|
||||
result: false,
|
||||
}, {
|
||||
pod: test.BuildTestPod("p6", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Spec.Volumes = []v1.Volume{
|
||||
{
|
||||
Name: "sample",
|
||||
VolumeSource: v1.VolumeSource{
|
||||
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
||||
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
evictLocalStoragePods: true,
|
||||
result: true,
|
||||
}, {
|
||||
pod: test.BuildTestPod("p7", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod) {
|
||||
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
|
||||
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod.Spec.Volumes = []v1.Volume{
|
||||
{
|
||||
Name: "sample",
|
||||
VolumeSource: v1.VolumeSource{
|
||||
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
||||
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
|
||||
},
|
||||
},
|
||||
}
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
result: true,
|
||||
}, {
|
||||
pod: test.BuildTestPod("p8", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
result: false,
|
||||
}, {
|
||||
pod: test.BuildTestPod("p9", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod) {
|
||||
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
|
||||
pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
result: true,
|
||||
}, {
|
||||
pod: test.BuildTestPod("p10", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod) {
|
||||
pod.Annotations = test.GetMirrorPodAnnotation()
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
result: false,
|
||||
}, {
|
||||
pod: test.BuildTestPod("p11", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod) {
|
||||
pod.Annotations = test.GetMirrorPodAnnotation()
|
||||
pod.Annotations["descheduler.alpha.kubernetes.io/evict"] = "true"
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
result: true,
|
||||
}, {
|
||||
pod: test.BuildTestPod("p12", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod) {
|
||||
priority := utils.SystemCriticalPriority
|
||||
pod.Spec.Priority = &priority
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
result: false,
|
||||
}, {
|
||||
pod: test.BuildTestPod("p13", 400, 0, n1.Name, nil),
|
||||
runBefore: func(pod *v1.Pod) {
|
||||
priority := utils.SystemCriticalPriority
|
||||
pod.Spec.Priority = &priority
|
||||
pod.Annotations = map[string]string{
|
||||
"descheduler.alpha.kubernetes.io/evict": "true",
|
||||
}
|
||||
},
|
||||
evictLocalStoragePods: false,
|
||||
result: true,
|
||||
},
|
||||
}
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.name, func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
var objs []runtime.Object
|
||||
objs = append(objs, testCase.node)
|
||||
for _, pod := range testCase.pods {
|
||||
objs = append(objs, pod)
|
||||
}
|
||||
fakeClient := fake.NewSimpleClientset(objs...)
|
||||
for _, test := range testCases {
|
||||
test.runBefore(test.pod)
|
||||
result := IsEvictable(test.pod, test.evictLocalStoragePods)
|
||||
if result != test.result {
|
||||
t.Errorf("IsEvictable should return for pod %s %t, but it returns %t", test.pod.Name, test.result, result)
|
||||
}
|
||||
|
||||
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
|
||||
podInformer := sharedInformerFactory.Core().V1().Pods()
|
||||
|
||||
getPodsAssignedToNode, err := BuildGetPodsAssignedToNodeFunc(podInformer)
|
||||
if err != nil {
|
||||
t.Errorf("Build get pods assigned to node function error: %v", err)
|
||||
}
|
||||
|
||||
sharedInformerFactory.Start(ctx.Done())
|
||||
sharedInformerFactory.WaitForCacheSync(ctx.Done())
|
||||
|
||||
filter, err := NewOptions().WithLabelSelector(testCase.labelSelector).BuildFilterFunc()
|
||||
if err != nil {
|
||||
t.Errorf("Build filter function error: %v", err)
|
||||
}
|
||||
|
||||
pods, _ := ListPodsOnANode(testCase.node.Name, getPodsAssignedToNode, filter)
|
||||
if len(pods) != testCase.expectedPodCount {
|
||||
t.Errorf("Expected %v pods on node %v, got %+v", testCase.expectedPodCount, testCase.node.Name, len(pods))
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
func TestPodTypes(t *testing.T) {
|
||||
n1 := test.BuildTestNode("node1", 1000, 2000, 9, nil)
|
||||
p1 := test.BuildTestPod("p1", 400, 0, n1.Name, nil)
|
||||
|
||||
func TestSortPodsBasedOnPriorityLowToHigh(t *testing.T) {
|
||||
n1 := test.BuildTestNode("n1", 4000, 3000, 9, nil)
|
||||
// These won't be evicted.
|
||||
p2 := test.BuildTestPod("p2", 400, 0, n1.Name, nil)
|
||||
p3 := test.BuildTestPod("p3", 400, 0, n1.Name, nil)
|
||||
p4 := test.BuildTestPod("p4", 400, 0, n1.Name, nil)
|
||||
p5 := test.BuildTestPod("p5", 400, 0, n1.Name, nil)
|
||||
p6 := test.BuildTestPod("p6", 400, 0, n1.Name, nil)
|
||||
|
||||
p1 := test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
test.SetPodPriority(pod, lowPriority)
|
||||
})
|
||||
p6.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
|
||||
// BestEffort
|
||||
p2 := test.BuildTestPod("p2", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
test.SetPodPriority(pod, highPriority)
|
||||
test.MakeBestEffortPod(pod)
|
||||
})
|
||||
|
||||
// Burstable
|
||||
p3 := test.BuildTestPod("p3", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
test.SetPodPriority(pod, highPriority)
|
||||
test.MakeBurstablePod(pod)
|
||||
})
|
||||
|
||||
// Guaranteed
|
||||
p4 := test.BuildTestPod("p4", 400, 100, n1.Name, func(pod *v1.Pod) {
|
||||
test.SetPodPriority(pod, highPriority)
|
||||
test.MakeGuaranteedPod(pod)
|
||||
})
|
||||
|
||||
// Best effort with nil priorities.
|
||||
p5 := test.BuildTestPod("p5", 400, 100, n1.Name, test.MakeBestEffortPod)
|
||||
p5.Spec.Priority = nil
|
||||
|
||||
p6 := test.BuildTestPod("p6", 400, 100, n1.Name, test.MakeGuaranteedPod)
|
||||
p6.Spec.Priority = nil
|
||||
|
||||
podList := []*v1.Pod{p4, p3, p2, p1, p6, p5}
|
||||
|
||||
SortPodsBasedOnPriorityLowToHigh(podList)
|
||||
if !reflect.DeepEqual(podList[len(podList)-1], p4) {
|
||||
t.Errorf("Expected last pod in sorted list to be %v which of highest priority and guaranteed but got %v", p4, podList[len(podList)-1])
|
||||
p1.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
|
||||
// The following 4 pods won't get evicted.
|
||||
// A daemonset.
|
||||
//p2.Annotations = test.GetDaemonSetAnnotation()
|
||||
p2.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
|
||||
// A pod with local storage.
|
||||
p3.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
p3.Spec.Volumes = []v1.Volume{
|
||||
{
|
||||
Name: "sample",
|
||||
VolumeSource: v1.VolumeSource{
|
||||
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
||||
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
|
||||
},
|
||||
},
|
||||
}
|
||||
// A Mirror Pod.
|
||||
p4.Annotations = test.GetMirrorPodAnnotation()
|
||||
// A Critical Pod.
|
||||
p5.Namespace = "kube-system"
|
||||
priority := utils.SystemCriticalPriority
|
||||
p5.Spec.Priority = &priority
|
||||
systemCriticalPriority := utils.SystemCriticalPriority
|
||||
p5.Spec.Priority = &systemCriticalPriority
|
||||
if !IsMirrorPod(p4) {
|
||||
t.Errorf("Expected p4 to be a mirror pod.")
|
||||
}
|
||||
if !IsCriticalPod(p5) {
|
||||
t.Errorf("Expected p5 to be a critical pod.")
|
||||
}
|
||||
if !IsPodWithLocalStorage(p3) {
|
||||
t.Errorf("Expected p3 to be a pod with local storage.")
|
||||
}
|
||||
ownerRefList := OwnerRef(p2)
|
||||
if !IsDaemonsetPod(ownerRefList) {
|
||||
t.Errorf("Expected p2 to be a daemonset pod.")
|
||||
}
|
||||
ownerRefList = OwnerRef(p1)
|
||||
if IsDaemonsetPod(ownerRefList) || IsPodWithLocalStorage(p1) || IsCriticalPod(p1) || IsMirrorPod(p1) {
|
||||
t.Errorf("Expected p1 to be a normal pod.")
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -21,7 +21,7 @@ import (
|
||||
"io/ioutil"
|
||||
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/klog"
|
||||
|
||||
"sigs.k8s.io/descheduler/pkg/api"
|
||||
"sigs.k8s.io/descheduler/pkg/api/v1alpha1"
|
||||
@@ -30,7 +30,7 @@ import (
|
||||
|
||||
func LoadPolicyConfig(policyConfigFile string) (*api.DeschedulerPolicy, error) {
|
||||
if policyConfigFile == "" {
|
||||
klog.V(1).InfoS("Policy config file not specified")
|
||||
klog.V(1).Infof("policy config file not specified")
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -19,22 +19,9 @@ package scheme
|
||||
import (
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/runtime/serializer"
|
||||
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
|
||||
"sigs.k8s.io/descheduler/pkg/api"
|
||||
"sigs.k8s.io/descheduler/pkg/api/v1alpha1"
|
||||
"sigs.k8s.io/descheduler/pkg/apis/componentconfig"
|
||||
componentconfigv1alpha1 "sigs.k8s.io/descheduler/pkg/apis/componentconfig/v1alpha1"
|
||||
)
|
||||
|
||||
var (
|
||||
Scheme = runtime.NewScheme()
|
||||
Codecs = serializer.NewCodecFactory(Scheme)
|
||||
)
|
||||
|
||||
func init() {
|
||||
utilruntime.Must(api.AddToScheme(Scheme))
|
||||
utilruntime.Must(v1alpha1.AddToScheme(Scheme))
|
||||
|
||||
utilruntime.Must(componentconfig.AddToScheme(Scheme))
|
||||
utilruntime.Must(componentconfigv1alpha1.AddToScheme(Scheme))
|
||||
}
|
||||
|
||||
@@ -18,8 +18,6 @@ package strategies
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"reflect"
|
||||
"sort"
|
||||
"strings"
|
||||
@@ -28,34 +26,13 @@ import (
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/klog"
|
||||
|
||||
"sigs.k8s.io/descheduler/pkg/api"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
|
||||
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
"sigs.k8s.io/descheduler/pkg/utils"
|
||||
)
|
||||
|
||||
func validateRemoveDuplicatePodsParams(params *api.StrategyParameters) error {
|
||||
if params == nil {
|
||||
return nil
|
||||
}
|
||||
// At most one of include/exclude can be set
|
||||
if params.Namespaces != nil && len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 {
|
||||
return fmt.Errorf("only one of Include/Exclude namespaces can be set")
|
||||
}
|
||||
if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" {
|
||||
return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
type podOwner struct {
|
||||
namespace, kind, name string
|
||||
imagesHash string
|
||||
}
|
||||
|
||||
// RemoveDuplicatePods removes the duplicate pods on node. This strategy evicts all duplicate pods on node.
|
||||
// A pod is said to be a duplicate of other if both of them are from same creator, kind and are within the same
|
||||
// namespace, and have at least one container with the same image.
|
||||
@@ -65,230 +42,81 @@ func RemoveDuplicatePods(
|
||||
client clientset.Interface,
|
||||
strategy api.DeschedulerStrategy,
|
||||
nodes []*v1.Node,
|
||||
evictLocalStoragePods bool,
|
||||
podEvictor *evictions.PodEvictor,
|
||||
getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc,
|
||||
) {
|
||||
if err := validateRemoveDuplicatePodsParams(strategy.Params); err != nil {
|
||||
klog.ErrorS(err, "Invalid RemoveDuplicatePods parameters")
|
||||
return
|
||||
}
|
||||
thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Failed to get threshold priority from strategy's params")
|
||||
return
|
||||
}
|
||||
|
||||
var includedNamespaces, excludedNamespaces sets.String
|
||||
if strategy.Params != nil && strategy.Params.Namespaces != nil {
|
||||
includedNamespaces = sets.NewString(strategy.Params.Namespaces.Include...)
|
||||
excludedNamespaces = sets.NewString(strategy.Params.Namespaces.Exclude...)
|
||||
}
|
||||
|
||||
nodeFit := false
|
||||
if strategy.Params != nil {
|
||||
nodeFit = strategy.Params.NodeFit
|
||||
}
|
||||
|
||||
evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit))
|
||||
|
||||
duplicatePods := make(map[podOwner]map[string][]*v1.Pod)
|
||||
ownerKeyOccurence := make(map[podOwner]int32)
|
||||
nodeCount := 0
|
||||
nodeMap := make(map[string]*v1.Node)
|
||||
|
||||
podFilter, err := podutil.NewOptions().
|
||||
WithFilter(evictable.IsEvictable).
|
||||
WithNamespaces(includedNamespaces).
|
||||
WithoutNamespaces(excludedNamespaces).
|
||||
BuildFilterFunc()
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Error initializing pod filter function")
|
||||
return
|
||||
}
|
||||
|
||||
for _, node := range nodes {
|
||||
klog.V(1).InfoS("Processing node", "node", klog.KObj(node))
|
||||
pods, err := podutil.ListPodsOnANode(node.Name, getPodsAssignedToNode, podFilter)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Error listing evictable pods on node", "node", klog.KObj(node))
|
||||
klog.V(1).Infof("Processing node: %#v", node.Name)
|
||||
duplicatePods := listDuplicatePodsOnANode(ctx, client, node, strategy, evictLocalStoragePods)
|
||||
for _, pod := range duplicatePods {
|
||||
if _, err := podEvictor.EvictPod(ctx, pod, node); err != nil {
|
||||
klog.Errorf("Error evicting pod: (%#v)", err)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// listDuplicatePodsOnANode lists duplicate pods on a given node.
|
||||
// It checks for pods which have the same owner and have at least 1 container with the same image spec
|
||||
func listDuplicatePodsOnANode(ctx context.Context, client clientset.Interface, node *v1.Node, strategy api.DeschedulerStrategy, evictLocalStoragePods bool) []*v1.Pod {
|
||||
pods, err := podutil.ListEvictablePodsOnNode(ctx, client, node, evictLocalStoragePods)
|
||||
if err != nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
duplicatePods := make([]*v1.Pod, 0, len(pods))
|
||||
// Each pod has a list of owners and a list of containers, and each container has 1 image spec.
|
||||
// For each pod, we go through all the OwnerRef/Image mappings and represent them as a "key" string.
|
||||
// All of those mappings together makes a list of "key" strings that essentially represent that pod's uniqueness.
|
||||
// This list of keys representing a single pod is then sorted alphabetically.
|
||||
// If any other pod has a list that matches that pod's list, those pods are undeniably duplicates for the following reasons:
|
||||
// - The 2 pods have the exact same ownerrefs
|
||||
// - The 2 pods have the exact same container images
|
||||
//
|
||||
// duplicateKeysMap maps the first Namespace/Kind/Name/Image in a pod's list to a 2D-slice of all the other lists where that is the first key
|
||||
// (Since we sort each pod's list, we only need to key the map on the first entry in each list. Any pod that doesn't have
|
||||
// the same first entry is clearly not a duplicate. This makes lookup quick and minimizes storage needed).
|
||||
// If any of the existing lists for that first key matches the current pod's list, the current pod is a duplicate.
|
||||
// If not, then we add this pod's list to the list of lists for that key.
|
||||
duplicateKeysMap := map[string][][]string{}
|
||||
for _, pod := range pods {
|
||||
ownerRefList := podutil.OwnerRef(pod)
|
||||
if hasExcludedOwnerRefKind(ownerRefList, strategy) {
|
||||
continue
|
||||
}
|
||||
nodeMap[node.Name] = node
|
||||
nodeCount++
|
||||
// Each pod has a list of owners and a list of containers, and each container has 1 image spec.
|
||||
// For each pod, we go through all the OwnerRef/Image mappings and represent them as a "key" string.
|
||||
// All of those mappings together makes a list of "key" strings that essentially represent that pod's uniqueness.
|
||||
// This list of keys representing a single pod is then sorted alphabetically.
|
||||
// If any other pod has a list that matches that pod's list, those pods are undeniably duplicates for the following reasons:
|
||||
// - The 2 pods have the exact same ownerrefs
|
||||
// - The 2 pods have the exact same container images
|
||||
//
|
||||
// duplicateKeysMap maps the first Namespace/Kind/Name/Image in a pod's list to a 2D-slice of all the other lists where that is the first key
|
||||
// (Since we sort each pod's list, we only need to key the map on the first entry in each list. Any pod that doesn't have
|
||||
// the same first entry is clearly not a duplicate. This makes lookup quick and minimizes storage needed).
|
||||
// If any of the existing lists for that first key matches the current pod's list, the current pod is a duplicate.
|
||||
// If not, then we add this pod's list to the list of lists for that key.
|
||||
duplicateKeysMap := map[string][][]string{}
|
||||
for _, pod := range pods {
|
||||
ownerRefList := podutil.OwnerRef(pod)
|
||||
if hasExcludedOwnerRefKind(ownerRefList, strategy) || len(ownerRefList) == 0 {
|
||||
continue
|
||||
}
|
||||
podContainerKeys := make([]string, 0, len(ownerRefList)*len(pod.Spec.Containers))
|
||||
imageList := []string{}
|
||||
podContainerKeys := make([]string, 0, len(ownerRefList)*len(pod.Spec.Containers))
|
||||
for _, ownerRef := range ownerRefList {
|
||||
for _, container := range pod.Spec.Containers {
|
||||
imageList = append(imageList, container.Image)
|
||||
}
|
||||
sort.Strings(imageList)
|
||||
imagesHash := strings.Join(imageList, "#")
|
||||
for _, ownerRef := range ownerRefList {
|
||||
ownerKey := podOwner{
|
||||
namespace: pod.ObjectMeta.Namespace,
|
||||
kind: ownerRef.Kind,
|
||||
name: ownerRef.Name,
|
||||
imagesHash: imagesHash,
|
||||
}
|
||||
ownerKeyOccurence[ownerKey] = ownerKeyOccurence[ownerKey] + 1
|
||||
for _, image := range imageList {
|
||||
// Namespace/Kind/Name should be unique for the cluster.
|
||||
// We also consider the image, as 2 pods could have the same owner but serve different purposes
|
||||
// So any non-unique Namespace/Kind/Name/Image pattern is a duplicate pod.
|
||||
s := strings.Join([]string{pod.ObjectMeta.Namespace, ownerRef.Kind, ownerRef.Name, image}, "/")
|
||||
podContainerKeys = append(podContainerKeys, s)
|
||||
}
|
||||
}
|
||||
sort.Strings(podContainerKeys)
|
||||
|
||||
// If there have been any other pods with the same first "key", look through all the lists to see if any match
|
||||
if existing, ok := duplicateKeysMap[podContainerKeys[0]]; ok {
|
||||
matched := false
|
||||
for _, keys := range existing {
|
||||
if reflect.DeepEqual(keys, podContainerKeys) {
|
||||
matched = true
|
||||
klog.V(3).InfoS("Duplicate found", "pod", klog.KObj(pod))
|
||||
for _, ownerRef := range ownerRefList {
|
||||
ownerKey := podOwner{
|
||||
namespace: pod.ObjectMeta.Namespace,
|
||||
kind: ownerRef.Kind,
|
||||
name: ownerRef.Name,
|
||||
imagesHash: imagesHash,
|
||||
}
|
||||
if _, ok := duplicatePods[ownerKey]; !ok {
|
||||
duplicatePods[ownerKey] = make(map[string][]*v1.Pod)
|
||||
}
|
||||
duplicatePods[ownerKey][node.Name] = append(duplicatePods[ownerKey][node.Name], pod)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
if !matched {
|
||||
// Found no matches, add this list of keys to the list of lists that have the same first key
|
||||
duplicateKeysMap[podContainerKeys[0]] = append(duplicateKeysMap[podContainerKeys[0]], podContainerKeys)
|
||||
}
|
||||
} else {
|
||||
// This is the first pod we've seen that has this first "key" entry
|
||||
duplicateKeysMap[podContainerKeys[0]] = [][]string{podContainerKeys}
|
||||
// Namespace/Kind/Name should be unique for the cluster.
|
||||
// We also consider the image, as 2 pods could have the same owner but serve different purposes
|
||||
// So any non-unique Namespace/Kind/Name/Image pattern is a duplicate pod.
|
||||
s := strings.Join([]string{pod.ObjectMeta.Namespace, ownerRef.Kind, ownerRef.Name, container.Image}, "/")
|
||||
podContainerKeys = append(podContainerKeys, s)
|
||||
}
|
||||
}
|
||||
}
|
||||
sort.Strings(podContainerKeys)
|
||||
|
||||
// 1. how many pods can be evicted to respect uniform placement of pods among viable nodes?
|
||||
for ownerKey, podNodes := range duplicatePods {
|
||||
targetNodes := getTargetNodes(podNodes, nodes)
|
||||
|
||||
klog.V(2).InfoS("Adjusting feasible nodes", "owner", ownerKey, "from", nodeCount, "to", len(targetNodes))
|
||||
if len(targetNodes) < 2 {
|
||||
klog.V(1).InfoS("Less than two feasible nodes for duplicates to land, skipping eviction", "owner", ownerKey)
|
||||
continue
|
||||
}
|
||||
|
||||
upperAvg := int(math.Ceil(float64(ownerKeyOccurence[ownerKey]) / float64(len(targetNodes))))
|
||||
for nodeName, pods := range podNodes {
|
||||
klog.V(2).InfoS("Average occurrence per node", "node", klog.KObj(nodeMap[nodeName]), "ownerKey", ownerKey, "avg", upperAvg)
|
||||
// list of duplicated pods does not contain the original referential pod
|
||||
if len(pods)+1 > upperAvg {
|
||||
// It's assumed all duplicated pods are in the same priority class
|
||||
// TODO(jchaloup): check if the pod has a different node to lend to
|
||||
for _, pod := range pods[upperAvg-1:] {
|
||||
if _, err := podEvictor.EvictPod(ctx, pod, nodeMap[nodeName], "RemoveDuplicatePods"); err != nil {
|
||||
klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod))
|
||||
break
|
||||
}
|
||||
// If there have been any other pods with the same first "key", look through all the lists to see if any match
|
||||
if existing, ok := duplicateKeysMap[podContainerKeys[0]]; ok {
|
||||
for _, keys := range existing {
|
||||
if reflect.DeepEqual(keys, podContainerKeys) {
|
||||
duplicatePods = append(duplicatePods, pod)
|
||||
break
|
||||
}
|
||||
// Found no matches, add this list of keys to the list of lists that have the same first key
|
||||
duplicateKeysMap[podContainerKeys[0]] = append(duplicateKeysMap[podContainerKeys[0]], podContainerKeys)
|
||||
}
|
||||
} else {
|
||||
// This is the first pod we've seen that has this first "key" entry
|
||||
duplicateKeysMap[podContainerKeys[0]] = [][]string{podContainerKeys}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func getNodeAffinityNodeSelector(pod *v1.Pod) *v1.NodeSelector {
|
||||
if pod.Spec.Affinity == nil {
|
||||
return nil
|
||||
}
|
||||
if pod.Spec.Affinity.NodeAffinity == nil {
|
||||
return nil
|
||||
}
|
||||
return pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution
|
||||
}
|
||||
|
||||
func getTargetNodes(podNodes map[string][]*v1.Pod, nodes []*v1.Node) []*v1.Node {
|
||||
// In order to reduce the number of pods processed, identify pods which have
|
||||
// equal (tolerations, nodeselectors, node affinity) terms and considered them
|
||||
// as identical. Identical pods wrt. (tolerations, nodeselectors, node affinity) terms
|
||||
// will produce the same result when checking if a pod is feasible for a node.
|
||||
// Thus, improving efficiency of processing pods marked for eviction.
|
||||
|
||||
// Collect all distinct pods which differ in at least taints, node affinity or node selector terms
|
||||
distinctPods := map[*v1.Pod]struct{}{}
|
||||
for _, pods := range podNodes {
|
||||
for _, pod := range pods {
|
||||
duplicated := false
|
||||
for dp := range distinctPods {
|
||||
if utils.TolerationsEqual(pod.Spec.Tolerations, dp.Spec.Tolerations) &&
|
||||
utils.NodeSelectorsEqual(getNodeAffinityNodeSelector(pod), getNodeAffinityNodeSelector(dp)) &&
|
||||
reflect.DeepEqual(pod.Spec.NodeSelector, dp.Spec.NodeSelector) {
|
||||
duplicated = true
|
||||
continue
|
||||
}
|
||||
}
|
||||
if duplicated {
|
||||
continue
|
||||
}
|
||||
distinctPods[pod] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
// For each distinct pod get a list of nodes where it can land
|
||||
targetNodesMap := map[string]*v1.Node{}
|
||||
for pod := range distinctPods {
|
||||
matchingNodes := map[string]*v1.Node{}
|
||||
for _, node := range nodes {
|
||||
if !utils.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, node.Spec.Taints, func(taint *v1.Taint) bool {
|
||||
return taint.Effect == v1.TaintEffectNoSchedule || taint.Effect == v1.TaintEffectNoExecute
|
||||
}) {
|
||||
continue
|
||||
}
|
||||
if match, err := utils.PodMatchNodeSelector(pod, node); err == nil && !match {
|
||||
continue
|
||||
}
|
||||
matchingNodes[node.Name] = node
|
||||
}
|
||||
if len(matchingNodes) > 1 {
|
||||
for nodeName := range matchingNodes {
|
||||
targetNodesMap[nodeName] = matchingNodes[nodeName]
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
targetNodes := []*v1.Node{}
|
||||
for _, node := range targetNodesMap {
|
||||
targetNodes = append(targetNodes, node)
|
||||
}
|
||||
|
||||
return targetNodes
|
||||
return duplicatePods
|
||||
}
|
||||
|
||||
func hasExcludedOwnerRefKind(ownerRefs []metav1.OwnerReference, strategy api.DeschedulerStrategy) bool {
|
||||
if strategy.Params == nil || strategy.Params.RemoveDuplicates == nil {
|
||||
if strategy.Params.RemoveDuplicates == nil {
|
||||
return false
|
||||
}
|
||||
exclude := sets.NewString(strategy.Params.RemoveDuplicates.ExcludeOwnerKinds...)
|
||||
|
||||
@@ -20,96 +20,46 @@ import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
policyv1 "k8s.io/api/policy/v1"
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/client-go/informers"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
|
||||
core "k8s.io/client-go/testing"
|
||||
"sigs.k8s.io/descheduler/pkg/api"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
|
||||
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
"sigs.k8s.io/descheduler/pkg/utils"
|
||||
"sigs.k8s.io/descheduler/test"
|
||||
)
|
||||
|
||||
func buildTestPodWithImage(podName, node, image string) *v1.Pod {
|
||||
pod := test.BuildTestPod(podName, 100, 0, node, test.SetRSOwnerRef)
|
||||
pod.Spec.Containers = append(pod.Spec.Containers, v1.Container{
|
||||
Name: image,
|
||||
Image: image,
|
||||
})
|
||||
return pod
|
||||
}
|
||||
|
||||
func TestFindDuplicatePods(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
// first setup pods
|
||||
node1 := test.BuildTestNode("n1", 2000, 3000, 10, nil)
|
||||
node2 := test.BuildTestNode("n2", 2000, 3000, 10, nil)
|
||||
node3 := test.BuildTestNode("n3", 2000, 3000, 10, func(node *v1.Node) {
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: "hardware",
|
||||
Value: "gpu",
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
})
|
||||
node4 := test.BuildTestNode("n4", 2000, 3000, 10, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
"datacenter": "east",
|
||||
}
|
||||
})
|
||||
node5 := test.BuildTestNode("n5", 2000, 3000, 10, func(node *v1.Node) {
|
||||
node.Spec = v1.NodeSpec{
|
||||
Unschedulable: true,
|
||||
}
|
||||
})
|
||||
node6 := test.BuildTestNode("n6", 200, 200, 10, nil)
|
||||
|
||||
p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil)
|
||||
node := test.BuildTestNode("n1", 2000, 3000, 10, nil)
|
||||
p1 := test.BuildTestPod("p1", 100, 0, node.Name, nil)
|
||||
p1.Namespace = "dev"
|
||||
p2 := test.BuildTestPod("p2", 100, 0, node1.Name, nil)
|
||||
p2 := test.BuildTestPod("p2", 100, 0, node.Name, nil)
|
||||
p2.Namespace = "dev"
|
||||
p3 := test.BuildTestPod("p3", 100, 0, node1.Name, nil)
|
||||
p3 := test.BuildTestPod("p3", 100, 0, node.Name, nil)
|
||||
p3.Namespace = "dev"
|
||||
p4 := test.BuildTestPod("p4", 100, 0, node1.Name, nil)
|
||||
p5 := test.BuildTestPod("p5", 100, 0, node1.Name, nil)
|
||||
p6 := test.BuildTestPod("p6", 100, 0, node1.Name, nil)
|
||||
p7 := test.BuildTestPod("p7", 100, 0, node1.Name, nil)
|
||||
p4 := test.BuildTestPod("p4", 100, 0, node.Name, nil)
|
||||
p5 := test.BuildTestPod("p5", 100, 0, node.Name, nil)
|
||||
p6 := test.BuildTestPod("p6", 100, 0, node.Name, nil)
|
||||
p7 := test.BuildTestPod("p7", 100, 0, node.Name, nil)
|
||||
p7.Namespace = "kube-system"
|
||||
p8 := test.BuildTestPod("p8", 100, 0, node1.Name, nil)
|
||||
p8 := test.BuildTestPod("p8", 100, 0, node.Name, nil)
|
||||
p8.Namespace = "test"
|
||||
p9 := test.BuildTestPod("p9", 100, 0, node1.Name, nil)
|
||||
p9 := test.BuildTestPod("p9", 100, 0, node.Name, nil)
|
||||
p9.Namespace = "test"
|
||||
p10 := test.BuildTestPod("p10", 100, 0, node1.Name, nil)
|
||||
p10 := test.BuildTestPod("p10", 100, 0, node.Name, nil)
|
||||
p10.Namespace = "test"
|
||||
p11 := test.BuildTestPod("p11", 100, 0, node1.Name, nil)
|
||||
p11 := test.BuildTestPod("p11", 100, 0, node.Name, nil)
|
||||
p11.Namespace = "different-images"
|
||||
p12 := test.BuildTestPod("p12", 100, 0, node1.Name, nil)
|
||||
p12 := test.BuildTestPod("p12", 100, 0, node.Name, nil)
|
||||
p12.Namespace = "different-images"
|
||||
p13 := test.BuildTestPod("p13", 100, 0, node1.Name, nil)
|
||||
p13 := test.BuildTestPod("p13", 100, 0, node.Name, nil)
|
||||
p13.Namespace = "different-images"
|
||||
p14 := test.BuildTestPod("p14", 100, 0, node1.Name, nil)
|
||||
p14 := test.BuildTestPod("p14", 100, 0, node.Name, nil)
|
||||
p14.Namespace = "different-images"
|
||||
p15 := test.BuildTestPod("p15", 100, 0, node1.Name, nil)
|
||||
p15.Namespace = "node-fit"
|
||||
p16 := test.BuildTestPod("NOT1", 100, 0, node1.Name, nil)
|
||||
p16.Namespace = "node-fit"
|
||||
p17 := test.BuildTestPod("NOT2", 100, 0, node1.Name, nil)
|
||||
p17.Namespace = "node-fit"
|
||||
p18 := test.BuildTestPod("TARGET", 100, 0, node1.Name, nil)
|
||||
p18.Namespace = "node-fit"
|
||||
|
||||
// This pod sits on node6 and is used to take up CPU requests on the node
|
||||
p19 := test.BuildTestPod("CPU-eater", 150, 150, node6.Name, nil)
|
||||
p19.Namespace = "test"
|
||||
|
||||
// Dummy pod for node6 used to do the opposite of p19
|
||||
p20 := test.BuildTestPod("CPU-saver", 100, 150, node6.Name, nil)
|
||||
p20.Namespace = "test"
|
||||
|
||||
// ### Evictable Pods ###
|
||||
|
||||
@@ -163,604 +113,99 @@ func TestFindDuplicatePods(t *testing.T) {
|
||||
Image: "foo",
|
||||
})
|
||||
|
||||
// ### Pods Evictable Based On Node Fit ###
|
||||
|
||||
ownerRef3 := test.GetReplicaSetOwnerRefList()
|
||||
p15.ObjectMeta.OwnerReferences = ownerRef3
|
||||
p16.ObjectMeta.OwnerReferences = ownerRef3
|
||||
p17.ObjectMeta.OwnerReferences = ownerRef3
|
||||
p18.ObjectMeta.OwnerReferences = ownerRef3
|
||||
|
||||
p15.Spec.NodeSelector = map[string]string{
|
||||
"datacenter": "west",
|
||||
}
|
||||
p16.Spec.NodeSelector = map[string]string{
|
||||
"datacenter": "west",
|
||||
}
|
||||
p17.Spec.NodeSelector = map[string]string{
|
||||
"datacenter": "west",
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
pods []*v1.Pod
|
||||
nodes []*v1.Node
|
||||
expectedEvictedPodCount uint
|
||||
maxPodsToEvict int
|
||||
pods []v1.Pod
|
||||
expectedEvictedPodCount int
|
||||
strategy api.DeschedulerStrategy
|
||||
}{
|
||||
{
|
||||
description: "Three pods in the `dev` Namespace, bound to same ReplicaSet. 1 should be evicted.",
|
||||
pods: []*v1.Pod{p1, p2, p3},
|
||||
nodes: []*v1.Node{node1, node2},
|
||||
expectedEvictedPodCount: 1,
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
},
|
||||
{
|
||||
description: "Three pods in the `dev` Namespace, bound to same ReplicaSet, but ReplicaSet kind is excluded. 0 should be evicted.",
|
||||
pods: []*v1.Pod{p1, p2, p3},
|
||||
nodes: []*v1.Node{node1, node2},
|
||||
expectedEvictedPodCount: 0,
|
||||
strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{RemoveDuplicates: &api.RemoveDuplicates{ExcludeOwnerKinds: []string{"ReplicaSet"}}}},
|
||||
},
|
||||
{
|
||||
description: "Three Pods in the `test` Namespace, bound to same ReplicaSet. 1 should be evicted.",
|
||||
pods: []*v1.Pod{p8, p9, p10},
|
||||
nodes: []*v1.Node{node1, node2},
|
||||
expectedEvictedPodCount: 1,
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
},
|
||||
{
|
||||
description: "Three Pods in the `dev` Namespace, three Pods in the `test` Namespace. Bound to ReplicaSet with same name. 4 should be evicted.",
|
||||
pods: []*v1.Pod{p1, p2, p3, p8, p9, p10},
|
||||
nodes: []*v1.Node{node1, node2},
|
||||
description: "Three pods in the `dev` Namespace, bound to same ReplicaSet. 2 should be evicted.",
|
||||
maxPodsToEvict: 5,
|
||||
pods: []v1.Pod{*p1, *p2, *p3},
|
||||
expectedEvictedPodCount: 2,
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
},
|
||||
{
|
||||
description: "Three pods in the `dev` Namespace, bound to same ReplicaSet, but ReplicaSet kind is excluded. 0 should be evicted.",
|
||||
maxPodsToEvict: 5,
|
||||
pods: []v1.Pod{*p1, *p2, *p3},
|
||||
expectedEvictedPodCount: 0,
|
||||
strategy: api.DeschedulerStrategy{Params: api.StrategyParameters{RemoveDuplicates: &api.RemoveDuplicates{ExcludeOwnerKinds: []string{"ReplicaSet"}}}},
|
||||
},
|
||||
{
|
||||
description: "Three Pods in the `test` Namespace, bound to same ReplicaSet. 2 should be evicted.",
|
||||
maxPodsToEvict: 5,
|
||||
pods: []v1.Pod{*p8, *p9, *p10},
|
||||
expectedEvictedPodCount: 2,
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
},
|
||||
{
|
||||
description: "Three Pods in the `dev` Namespace, three Pods in the `test` Namespace. Bound to ReplicaSet with same name. 4 should be evicted.",
|
||||
maxPodsToEvict: 5,
|
||||
pods: []v1.Pod{*p1, *p2, *p3, *p8, *p9, *p10},
|
||||
expectedEvictedPodCount: 4,
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
},
|
||||
{
|
||||
description: "Pods are: part of DaemonSet, with local storage, mirror pod annotation, critical pod annotation - none should be evicted.",
|
||||
pods: []*v1.Pod{p4, p5, p6, p7},
|
||||
nodes: []*v1.Node{node1, node2},
|
||||
maxPodsToEvict: 2,
|
||||
pods: []v1.Pod{*p4, *p5, *p6, *p7},
|
||||
expectedEvictedPodCount: 0,
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
},
|
||||
{
|
||||
description: "Test all Pods: 4 should be evicted.",
|
||||
pods: []*v1.Pod{p1, p2, p3, p4, p5, p6, p7, p8, p9, p10},
|
||||
nodes: []*v1.Node{node1, node2},
|
||||
expectedEvictedPodCount: 2,
|
||||
maxPodsToEvict: 5,
|
||||
pods: []v1.Pod{*p1, *p2, *p3, *p4, *p5, *p6, *p7, *p8, *p9, *p10},
|
||||
expectedEvictedPodCount: 4,
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
},
|
||||
{
|
||||
description: "Pods with the same owner but different images should not be evicted",
|
||||
pods: []*v1.Pod{p11, p12},
|
||||
nodes: []*v1.Node{node1, node2},
|
||||
maxPodsToEvict: 5,
|
||||
pods: []v1.Pod{*p11, *p12},
|
||||
expectedEvictedPodCount: 0,
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
},
|
||||
{
|
||||
description: "Pods with multiple containers should not match themselves",
|
||||
pods: []*v1.Pod{p13},
|
||||
nodes: []*v1.Node{node1, node2},
|
||||
maxPodsToEvict: 5,
|
||||
pods: []v1.Pod{*p13},
|
||||
expectedEvictedPodCount: 0,
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
},
|
||||
{
|
||||
description: "Pods with matching ownerrefs and at not all matching image should not trigger an eviction",
|
||||
pods: []*v1.Pod{p11, p13},
|
||||
nodes: []*v1.Node{node1, node2},
|
||||
maxPodsToEvict: 5,
|
||||
pods: []v1.Pod{*p11, *p13},
|
||||
expectedEvictedPodCount: 0,
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
},
|
||||
{
|
||||
description: "Three pods in the `dev` Namespace, bound to same ReplicaSet. Only node available has a taint, and nodeFit set to true. 0 should be evicted.",
|
||||
pods: []*v1.Pod{p1, p2, p3},
|
||||
nodes: []*v1.Node{node1, node3},
|
||||
expectedEvictedPodCount: 0,
|
||||
strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}},
|
||||
},
|
||||
{
|
||||
description: "Three pods in the `node-fit` Namespace, bound to same ReplicaSet, all with a nodeSelector. Only node available has an incorrect node label, and nodeFit set to true. 0 should be evicted.",
|
||||
pods: []*v1.Pod{p15, p16, p17},
|
||||
nodes: []*v1.Node{node1, node4},
|
||||
expectedEvictedPodCount: 0,
|
||||
strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}},
|
||||
},
|
||||
{
|
||||
description: "Three pods in the `node-fit` Namespace, bound to same ReplicaSet. Only node available is not schedulable, and nodeFit set to true. 0 should be evicted.",
|
||||
pods: []*v1.Pod{p1, p2, p3},
|
||||
nodes: []*v1.Node{node1, node5},
|
||||
expectedEvictedPodCount: 0,
|
||||
strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}},
|
||||
},
|
||||
{
|
||||
description: "Three pods in the `node-fit` Namespace, bound to same ReplicaSet. Only node available does not have enough CPU, and nodeFit set to true. 0 should be evicted.",
|
||||
pods: []*v1.Pod{p1, p2, p3, p19},
|
||||
nodes: []*v1.Node{node1, node6},
|
||||
expectedEvictedPodCount: 0,
|
||||
strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}},
|
||||
},
|
||||
{
|
||||
description: "Three pods in the `node-fit` Namespace, bound to same ReplicaSet. Only node available has enough CPU, and nodeFit set to true. 1 should be evicted.",
|
||||
pods: []*v1.Pod{p1, p2, p3, p20},
|
||||
nodes: []*v1.Node{node1, node6},
|
||||
expectedEvictedPodCount: 1,
|
||||
strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}},
|
||||
},
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.description, func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
var objs []runtime.Object
|
||||
for _, node := range testCase.nodes {
|
||||
objs = append(objs, node)
|
||||
}
|
||||
for _, pod := range testCase.pods {
|
||||
objs = append(objs, pod)
|
||||
}
|
||||
fakeClient := fake.NewSimpleClientset(objs...)
|
||||
|
||||
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
|
||||
podInformer := sharedInformerFactory.Core().V1().Pods()
|
||||
|
||||
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
|
||||
if err != nil {
|
||||
t.Errorf("Build get pods assigned to node function error: %v", err)
|
||||
}
|
||||
|
||||
sharedInformerFactory.Start(ctx.Done())
|
||||
sharedInformerFactory.WaitForCacheSync(ctx.Done())
|
||||
|
||||
podEvictor := evictions.NewPodEvictor(
|
||||
fakeClient,
|
||||
"v1",
|
||||
false,
|
||||
nil,
|
||||
nil,
|
||||
testCase.nodes,
|
||||
getPodsAssignedToNode,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
)
|
||||
|
||||
RemoveDuplicatePods(ctx, fakeClient, testCase.strategy, testCase.nodes, podEvictor, getPodsAssignedToNode)
|
||||
podsEvicted := podEvictor.TotalEvicted()
|
||||
if podsEvicted != testCase.expectedEvictedPodCount {
|
||||
t.Errorf("Test error for description: %s. Expected evicted pods count %v, got %v", testCase.description, testCase.expectedEvictedPodCount, podsEvicted)
|
||||
}
|
||||
fakeClient := &fake.Clientset{}
|
||||
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
|
||||
return true, &v1.PodList{Items: testCase.pods}, nil
|
||||
})
|
||||
fakeClient.Fake.AddReactor("get", "nodes", func(action core.Action) (bool, runtime.Object, error) {
|
||||
return true, node, nil
|
||||
})
|
||||
podEvictor := evictions.NewPodEvictor(
|
||||
fakeClient,
|
||||
"v1",
|
||||
false,
|
||||
testCase.maxPodsToEvict,
|
||||
[]*v1.Node{node},
|
||||
)
|
||||
|
||||
RemoveDuplicatePods(ctx, fakeClient, testCase.strategy, []*v1.Node{node}, false, podEvictor)
|
||||
podsEvicted := podEvictor.TotalEvicted()
|
||||
if podsEvicted != testCase.expectedEvictedPodCount {
|
||||
t.Errorf("Test error for description: %s. Expected evicted pods count %v, got %v", testCase.description, testCase.expectedEvictedPodCount, podsEvicted)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestRemoveDuplicatesUniformly(t *testing.T) {
|
||||
setRSOwnerRef2 := func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = []metav1.OwnerReference{
|
||||
{Kind: "ReplicaSet", APIVersion: "v1", Name: "replicaset-2"},
|
||||
}
|
||||
}
|
||||
setTwoRSOwnerRef := func(pod *v1.Pod) {
|
||||
pod.ObjectMeta.OwnerReferences = []metav1.OwnerReference{
|
||||
{Kind: "ReplicaSet", APIVersion: "v1", Name: "replicaset-1"},
|
||||
{Kind: "ReplicaSet", APIVersion: "v1", Name: "replicaset-2"},
|
||||
}
|
||||
}
|
||||
|
||||
setTolerationsK1 := func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
pod.Spec.Tolerations = []v1.Toleration{
|
||||
{
|
||||
Key: "k1",
|
||||
Value: "v1",
|
||||
Operator: v1.TolerationOpEqual,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
}
|
||||
setTolerationsK2 := func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
pod.Spec.Tolerations = []v1.Toleration{
|
||||
{
|
||||
Key: "k2",
|
||||
Value: "v2",
|
||||
Operator: v1.TolerationOpEqual,
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
setMasterNoScheduleTaint := func(node *v1.Node) {
|
||||
node.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
Key: "node-role.kubernetes.io/control-plane",
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
setMasterNoScheduleLabel := func(node *v1.Node) {
|
||||
if node.ObjectMeta.Labels == nil {
|
||||
node.ObjectMeta.Labels = map[string]string{}
|
||||
}
|
||||
node.ObjectMeta.Labels["node-role.kubernetes.io/control-plane"] = ""
|
||||
}
|
||||
|
||||
setWorkerLabel := func(node *v1.Node) {
|
||||
if node.ObjectMeta.Labels == nil {
|
||||
node.ObjectMeta.Labels = map[string]string{}
|
||||
}
|
||||
node.ObjectMeta.Labels["node-role.kubernetes.io/worker"] = "k1"
|
||||
node.ObjectMeta.Labels["node-role.kubernetes.io/worker"] = "k2"
|
||||
}
|
||||
|
||||
setNotMasterNodeSelectorK1 := func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
pod.Spec.Affinity = &v1.Affinity{
|
||||
NodeAffinity: &v1.NodeAffinity{
|
||||
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
|
||||
NodeSelectorTerms: []v1.NodeSelectorTerm{
|
||||
{
|
||||
MatchExpressions: []v1.NodeSelectorRequirement{
|
||||
{
|
||||
Key: "node-role.kubernetes.io/control-plane",
|
||||
Operator: v1.NodeSelectorOpDoesNotExist,
|
||||
},
|
||||
{
|
||||
Key: "k1",
|
||||
Operator: v1.NodeSelectorOpDoesNotExist,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
setNotMasterNodeSelectorK2 := func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
pod.Spec.Affinity = &v1.Affinity{
|
||||
NodeAffinity: &v1.NodeAffinity{
|
||||
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
|
||||
NodeSelectorTerms: []v1.NodeSelectorTerm{
|
||||
{
|
||||
MatchExpressions: []v1.NodeSelectorRequirement{
|
||||
{
|
||||
Key: "node-role.kubernetes.io/control-plane",
|
||||
Operator: v1.NodeSelectorOpDoesNotExist,
|
||||
},
|
||||
{
|
||||
Key: "k2",
|
||||
Operator: v1.NodeSelectorOpDoesNotExist,
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
setWorkerLabelSelectorK1 := func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
if pod.Spec.NodeSelector == nil {
|
||||
pod.Spec.NodeSelector = map[string]string{}
|
||||
}
|
||||
pod.Spec.NodeSelector["node-role.kubernetes.io/worker"] = "k1"
|
||||
}
|
||||
|
||||
setWorkerLabelSelectorK2 := func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
if pod.Spec.NodeSelector == nil {
|
||||
pod.Spec.NodeSelector = map[string]string{}
|
||||
}
|
||||
pod.Spec.NodeSelector["node-role.kubernetes.io/worker"] = "k2"
|
||||
}
|
||||
|
||||
testCases := []struct {
|
||||
description string
|
||||
pods []*v1.Pod
|
||||
nodes []*v1.Node
|
||||
expectedEvictedPodCount uint
|
||||
strategy api.DeschedulerStrategy
|
||||
}{
|
||||
{
|
||||
description: "Evict pods uniformly",
|
||||
pods: []*v1.Pod{
|
||||
// (5,3,1) -> (3,3,3) -> 2 evictions
|
||||
test.BuildTestPod("p1", 100, 0, "n1", test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p2", 100, 0, "n1", test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p3", 100, 0, "n1", test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p4", 100, 0, "n1", test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p5", 100, 0, "n1", test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p6", 100, 0, "n2", test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p7", 100, 0, "n2", test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p8", 100, 0, "n2", test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p9", 100, 0, "n3", test.SetRSOwnerRef),
|
||||
},
|
||||
expectedEvictedPodCount: 2,
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("n1", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("n2", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("n3", 2000, 3000, 10, nil),
|
||||
},
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
},
|
||||
{
|
||||
description: "Evict pods uniformly with one node left out",
|
||||
pods: []*v1.Pod{
|
||||
// (5,3,1) -> (4,4,1) -> 1 eviction
|
||||
test.BuildTestPod("p1", 100, 0, "n1", test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p2", 100, 0, "n1", test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p3", 100, 0, "n1", test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p4", 100, 0, "n1", test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p5", 100, 0, "n1", test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p6", 100, 0, "n2", test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p7", 100, 0, "n2", test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p8", 100, 0, "n2", test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p9", 100, 0, "n3", test.SetRSOwnerRef),
|
||||
},
|
||||
expectedEvictedPodCount: 1,
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("n1", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("n2", 2000, 3000, 10, nil),
|
||||
},
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
},
|
||||
{
|
||||
description: "Evict pods uniformly with two replica sets",
|
||||
pods: []*v1.Pod{
|
||||
// (5,3,1) -> (3,3,3) -> 2 evictions
|
||||
test.BuildTestPod("p11", 100, 0, "n1", setTwoRSOwnerRef),
|
||||
test.BuildTestPod("p12", 100, 0, "n1", setTwoRSOwnerRef),
|
||||
test.BuildTestPod("p13", 100, 0, "n1", setTwoRSOwnerRef),
|
||||
test.BuildTestPod("p14", 100, 0, "n1", setTwoRSOwnerRef),
|
||||
test.BuildTestPod("p15", 100, 0, "n1", setTwoRSOwnerRef),
|
||||
test.BuildTestPod("p16", 100, 0, "n2", setTwoRSOwnerRef),
|
||||
test.BuildTestPod("p17", 100, 0, "n2", setTwoRSOwnerRef),
|
||||
test.BuildTestPod("p18", 100, 0, "n2", setTwoRSOwnerRef),
|
||||
test.BuildTestPod("p19", 100, 0, "n3", setTwoRSOwnerRef),
|
||||
},
|
||||
expectedEvictedPodCount: 4,
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("n1", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("n2", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("n3", 2000, 3000, 10, nil),
|
||||
},
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
},
|
||||
{
|
||||
description: "Evict pods uniformly with two owner references",
|
||||
pods: []*v1.Pod{
|
||||
// (5,3,1) -> (3,3,3) -> 2 evictions
|
||||
test.BuildTestPod("p11", 100, 0, "n1", test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p12", 100, 0, "n1", test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p13", 100, 0, "n1", test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p14", 100, 0, "n1", test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p15", 100, 0, "n1", test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p16", 100, 0, "n2", test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p17", 100, 0, "n2", test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p18", 100, 0, "n2", test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p19", 100, 0, "n3", test.SetRSOwnerRef),
|
||||
// (1,3,5) -> (3,3,3) -> 2 evictions
|
||||
test.BuildTestPod("p21", 100, 0, "n1", setRSOwnerRef2),
|
||||
test.BuildTestPod("p22", 100, 0, "n2", setRSOwnerRef2),
|
||||
test.BuildTestPod("p23", 100, 0, "n2", setRSOwnerRef2),
|
||||
test.BuildTestPod("p24", 100, 0, "n2", setRSOwnerRef2),
|
||||
test.BuildTestPod("p25", 100, 0, "n3", setRSOwnerRef2),
|
||||
test.BuildTestPod("p26", 100, 0, "n3", setRSOwnerRef2),
|
||||
test.BuildTestPod("p27", 100, 0, "n3", setRSOwnerRef2),
|
||||
test.BuildTestPod("p28", 100, 0, "n3", setRSOwnerRef2),
|
||||
test.BuildTestPod("p29", 100, 0, "n3", setRSOwnerRef2),
|
||||
},
|
||||
expectedEvictedPodCount: 4,
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("n1", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("n2", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("n3", 2000, 3000, 10, nil),
|
||||
},
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
},
|
||||
{
|
||||
description: "Evict pods with number of pods less than nodes",
|
||||
pods: []*v1.Pod{
|
||||
// (2,0,0) -> (1,1,0) -> 1 eviction
|
||||
test.BuildTestPod("p1", 100, 0, "n1", test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p2", 100, 0, "n1", test.SetRSOwnerRef),
|
||||
},
|
||||
expectedEvictedPodCount: 1,
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("n1", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("n2", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("n3", 2000, 3000, 10, nil),
|
||||
},
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
},
|
||||
{
|
||||
description: "Evict pods with number of pods less than nodes, but ignore different pods with the same ownerref",
|
||||
pods: []*v1.Pod{
|
||||
// (1, 0, 0) for "bar","baz" images -> no eviction, even with a matching ownerKey
|
||||
// (2, 0, 0) for "foo" image -> (1,1,0) - 1 eviction
|
||||
// In this case the only "real" duplicates are p1 and p4, so one of those should be evicted
|
||||
buildTestPodWithImage("p1", "n1", "foo"),
|
||||
buildTestPodWithImage("p2", "n1", "bar"),
|
||||
buildTestPodWithImage("p3", "n1", "baz"),
|
||||
buildTestPodWithImage("p4", "n1", "foo"),
|
||||
},
|
||||
expectedEvictedPodCount: 1,
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("n1", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("n2", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("n3", 2000, 3000, 10, nil),
|
||||
},
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
},
|
||||
{
|
||||
description: "Evict pods with a single pod with three nodes",
|
||||
pods: []*v1.Pod{
|
||||
// (2,0,0) -> (1,1,0) -> 1 eviction
|
||||
test.BuildTestPod("p1", 100, 0, "n1", test.SetRSOwnerRef),
|
||||
},
|
||||
expectedEvictedPodCount: 0,
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("n1", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("n2", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("n3", 2000, 3000, 10, nil),
|
||||
},
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
},
|
||||
{
|
||||
description: "Evict pods uniformly respecting taints",
|
||||
pods: []*v1.Pod{
|
||||
// (5,3,1,0,0,0) -> (3,3,3,0,0,0) -> 2 evictions
|
||||
test.BuildTestPod("p1", 100, 0, "worker1", setTolerationsK1),
|
||||
test.BuildTestPod("p2", 100, 0, "worker1", setTolerationsK2),
|
||||
test.BuildTestPod("p3", 100, 0, "worker1", setTolerationsK1),
|
||||
test.BuildTestPod("p4", 100, 0, "worker1", setTolerationsK2),
|
||||
test.BuildTestPod("p5", 100, 0, "worker1", setTolerationsK1),
|
||||
test.BuildTestPod("p6", 100, 0, "worker2", setTolerationsK2),
|
||||
test.BuildTestPod("p7", 100, 0, "worker2", setTolerationsK1),
|
||||
test.BuildTestPod("p8", 100, 0, "worker2", setTolerationsK2),
|
||||
test.BuildTestPod("p9", 100, 0, "worker3", setTolerationsK1),
|
||||
},
|
||||
expectedEvictedPodCount: 2,
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("worker1", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("worker2", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("worker3", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("master1", 2000, 3000, 10, setMasterNoScheduleTaint),
|
||||
test.BuildTestNode("master2", 2000, 3000, 10, setMasterNoScheduleTaint),
|
||||
test.BuildTestNode("master3", 2000, 3000, 10, setMasterNoScheduleTaint),
|
||||
},
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
},
|
||||
{
|
||||
description: "Evict pods uniformly respecting RequiredDuringSchedulingIgnoredDuringExecution node affinity",
|
||||
pods: []*v1.Pod{
|
||||
// (5,3,1,0,0,0) -> (3,3,3,0,0,0) -> 2 evictions
|
||||
test.BuildTestPod("p1", 100, 0, "worker1", setNotMasterNodeSelectorK1),
|
||||
test.BuildTestPod("p2", 100, 0, "worker1", setNotMasterNodeSelectorK2),
|
||||
test.BuildTestPod("p3", 100, 0, "worker1", setNotMasterNodeSelectorK1),
|
||||
test.BuildTestPod("p4", 100, 0, "worker1", setNotMasterNodeSelectorK2),
|
||||
test.BuildTestPod("p5", 100, 0, "worker1", setNotMasterNodeSelectorK1),
|
||||
test.BuildTestPod("p6", 100, 0, "worker2", setNotMasterNodeSelectorK2),
|
||||
test.BuildTestPod("p7", 100, 0, "worker2", setNotMasterNodeSelectorK1),
|
||||
test.BuildTestPod("p8", 100, 0, "worker2", setNotMasterNodeSelectorK2),
|
||||
test.BuildTestPod("p9", 100, 0, "worker3", setNotMasterNodeSelectorK1),
|
||||
},
|
||||
expectedEvictedPodCount: 2,
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("worker1", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("worker2", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("worker3", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("master1", 2000, 3000, 10, setMasterNoScheduleLabel),
|
||||
test.BuildTestNode("master2", 2000, 3000, 10, setMasterNoScheduleLabel),
|
||||
test.BuildTestNode("master3", 2000, 3000, 10, setMasterNoScheduleLabel),
|
||||
},
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
},
|
||||
{
|
||||
description: "Evict pods uniformly respecting node selector",
|
||||
pods: []*v1.Pod{
|
||||
// (5,3,1,0,0,0) -> (3,3,3,0,0,0) -> 2 evictions
|
||||
test.BuildTestPod("p1", 100, 0, "worker1", setWorkerLabelSelectorK1),
|
||||
test.BuildTestPod("p2", 100, 0, "worker1", setWorkerLabelSelectorK2),
|
||||
test.BuildTestPod("p3", 100, 0, "worker1", setWorkerLabelSelectorK1),
|
||||
test.BuildTestPod("p4", 100, 0, "worker1", setWorkerLabelSelectorK2),
|
||||
test.BuildTestPod("p5", 100, 0, "worker1", setWorkerLabelSelectorK1),
|
||||
test.BuildTestPod("p6", 100, 0, "worker2", setWorkerLabelSelectorK2),
|
||||
test.BuildTestPod("p7", 100, 0, "worker2", setWorkerLabelSelectorK1),
|
||||
test.BuildTestPod("p8", 100, 0, "worker2", setWorkerLabelSelectorK2),
|
||||
test.BuildTestPod("p9", 100, 0, "worker3", setWorkerLabelSelectorK1),
|
||||
},
|
||||
expectedEvictedPodCount: 2,
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("worker1", 2000, 3000, 10, setWorkerLabel),
|
||||
test.BuildTestNode("worker2", 2000, 3000, 10, setWorkerLabel),
|
||||
test.BuildTestNode("worker3", 2000, 3000, 10, setWorkerLabel),
|
||||
test.BuildTestNode("master1", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("master2", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("master3", 2000, 3000, 10, nil),
|
||||
},
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
},
|
||||
{
|
||||
description: "Evict pods uniformly respecting node selector with zero target nodes",
|
||||
pods: []*v1.Pod{
|
||||
// (5,3,1,0,0,0) -> (3,3,3,0,0,0) -> 2 evictions
|
||||
test.BuildTestPod("p1", 100, 0, "worker1", setWorkerLabelSelectorK1),
|
||||
test.BuildTestPod("p2", 100, 0, "worker1", setWorkerLabelSelectorK2),
|
||||
test.BuildTestPod("p3", 100, 0, "worker1", setWorkerLabelSelectorK1),
|
||||
test.BuildTestPod("p4", 100, 0, "worker1", setWorkerLabelSelectorK2),
|
||||
test.BuildTestPod("p5", 100, 0, "worker1", setWorkerLabelSelectorK1),
|
||||
test.BuildTestPod("p6", 100, 0, "worker2", setWorkerLabelSelectorK2),
|
||||
test.BuildTestPod("p7", 100, 0, "worker2", setWorkerLabelSelectorK1),
|
||||
test.BuildTestPod("p8", 100, 0, "worker2", setWorkerLabelSelectorK2),
|
||||
test.BuildTestPod("p9", 100, 0, "worker3", setWorkerLabelSelectorK1),
|
||||
},
|
||||
expectedEvictedPodCount: 0,
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("worker1", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("worker2", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("worker3", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("master1", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("master2", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("master3", 2000, 3000, 10, nil),
|
||||
},
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
},
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.description, func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
var objs []runtime.Object
|
||||
for _, node := range testCase.nodes {
|
||||
objs = append(objs, node)
|
||||
}
|
||||
for _, pod := range testCase.pods {
|
||||
objs = append(objs, pod)
|
||||
}
|
||||
fakeClient := fake.NewSimpleClientset(objs...)
|
||||
|
||||
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
|
||||
podInformer := sharedInformerFactory.Core().V1().Pods()
|
||||
|
||||
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
|
||||
if err != nil {
|
||||
t.Errorf("Build get pods assigned to node function error: %v", err)
|
||||
}
|
||||
|
||||
sharedInformerFactory.Start(ctx.Done())
|
||||
sharedInformerFactory.WaitForCacheSync(ctx.Done())
|
||||
|
||||
podEvictor := evictions.NewPodEvictor(
|
||||
fakeClient,
|
||||
policyv1.SchemeGroupVersion.String(),
|
||||
false,
|
||||
nil,
|
||||
nil,
|
||||
testCase.nodes,
|
||||
getPodsAssignedToNode,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
)
|
||||
|
||||
RemoveDuplicatePods(ctx, fakeClient, testCase.strategy, testCase.nodes, podEvictor, getPodsAssignedToNode)
|
||||
podsEvicted := podEvictor.TotalEvicted()
|
||||
if podsEvicted != testCase.expectedEvictedPodCount {
|
||||
t.Errorf("Test error for description: %s. Expected evicted pods count %v, got %v", testCase.description, testCase.expectedEvictedPodCount, podsEvicted)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,179 +0,0 @@
|
||||
package strategies
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
utilerrors "k8s.io/apimachinery/pkg/util/errors"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
"sigs.k8s.io/descheduler/pkg/api"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
|
||||
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/strategies/validation"
|
||||
)
|
||||
|
||||
// validatedFailedPodsStrategyParams contains validated strategy parameters
|
||||
type validatedFailedPodsStrategyParams struct {
|
||||
validation.ValidatedStrategyParams
|
||||
includingInitContainers bool
|
||||
reasons sets.String
|
||||
excludeOwnerKinds sets.String
|
||||
minPodLifetimeSeconds *uint
|
||||
}
|
||||
|
||||
// RemoveFailedPods removes Pods that are in failed status phase.
|
||||
func RemoveFailedPods(
|
||||
ctx context.Context,
|
||||
client clientset.Interface,
|
||||
strategy api.DeschedulerStrategy,
|
||||
nodes []*v1.Node,
|
||||
podEvictor *evictions.PodEvictor,
|
||||
getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc,
|
||||
) {
|
||||
strategyParams, err := validateAndParseRemoveFailedPodsParams(ctx, client, strategy.Params)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Invalid RemoveFailedPods parameters")
|
||||
return
|
||||
}
|
||||
|
||||
evictable := podEvictor.Evictable(
|
||||
evictions.WithPriorityThreshold(strategyParams.ThresholdPriority),
|
||||
evictions.WithNodeFit(strategyParams.NodeFit),
|
||||
evictions.WithLabelSelector(strategyParams.LabelSelector),
|
||||
)
|
||||
|
||||
var labelSelector *metav1.LabelSelector
|
||||
if strategy.Params != nil {
|
||||
labelSelector = strategy.Params.LabelSelector
|
||||
}
|
||||
|
||||
podFilter, err := podutil.NewOptions().
|
||||
WithFilter(evictable.IsEvictable).
|
||||
WithNamespaces(strategyParams.IncludedNamespaces).
|
||||
WithoutNamespaces(strategyParams.ExcludedNamespaces).
|
||||
WithLabelSelector(labelSelector).
|
||||
BuildFilterFunc()
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Error initializing pod filter function")
|
||||
return
|
||||
}
|
||||
// Only list failed pods
|
||||
phaseFilter := func(pod *v1.Pod) bool { return pod.Status.Phase == v1.PodFailed }
|
||||
podFilter = podutil.WrapFilterFuncs(phaseFilter, podFilter)
|
||||
|
||||
for _, node := range nodes {
|
||||
klog.V(1).InfoS("Processing node", "node", klog.KObj(node))
|
||||
pods, err := podutil.ListAllPodsOnANode(node.Name, getPodsAssignedToNode, podFilter)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Error listing a nodes failed pods", "node", klog.KObj(node))
|
||||
continue
|
||||
}
|
||||
|
||||
for i, pod := range pods {
|
||||
if err = validateFailedPodShouldEvict(pod, *strategyParams); err != nil {
|
||||
klog.V(4).InfoS(fmt.Sprintf("ignoring pod for eviction due to: %s", err.Error()), "pod", klog.KObj(pod))
|
||||
continue
|
||||
}
|
||||
|
||||
if _, err = podEvictor.EvictPod(ctx, pods[i], node, "FailedPod"); err != nil {
|
||||
klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod))
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func validateAndParseRemoveFailedPodsParams(
|
||||
ctx context.Context,
|
||||
client clientset.Interface,
|
||||
params *api.StrategyParameters,
|
||||
) (*validatedFailedPodsStrategyParams, error) {
|
||||
if params == nil {
|
||||
return &validatedFailedPodsStrategyParams{
|
||||
ValidatedStrategyParams: validation.DefaultValidatedStrategyParams(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
strategyParams, err := validation.ValidateAndParseStrategyParams(ctx, client, params)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var reasons, excludeOwnerKinds sets.String
|
||||
var includingInitContainers bool
|
||||
var minPodLifetimeSeconds *uint
|
||||
if params.FailedPods != nil {
|
||||
reasons = sets.NewString(params.FailedPods.Reasons...)
|
||||
includingInitContainers = params.FailedPods.IncludingInitContainers
|
||||
excludeOwnerKinds = sets.NewString(params.FailedPods.ExcludeOwnerKinds...)
|
||||
minPodLifetimeSeconds = params.FailedPods.MinPodLifetimeSeconds
|
||||
}
|
||||
|
||||
return &validatedFailedPodsStrategyParams{
|
||||
ValidatedStrategyParams: *strategyParams,
|
||||
includingInitContainers: includingInitContainers,
|
||||
reasons: reasons,
|
||||
excludeOwnerKinds: excludeOwnerKinds,
|
||||
minPodLifetimeSeconds: minPodLifetimeSeconds,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// validateFailedPodShouldEvict looks at strategy params settings to see if the Pod
|
||||
// should be evicted given the params in the PodFailed policy.
|
||||
func validateFailedPodShouldEvict(pod *v1.Pod, strategyParams validatedFailedPodsStrategyParams) error {
|
||||
var errs []error
|
||||
|
||||
if strategyParams.minPodLifetimeSeconds != nil {
|
||||
podAgeSeconds := uint(metav1.Now().Sub(pod.GetCreationTimestamp().Local()).Seconds())
|
||||
if podAgeSeconds < *strategyParams.minPodLifetimeSeconds {
|
||||
errs = append(errs, fmt.Errorf("pod does not exceed the min age seconds of %d", *strategyParams.minPodLifetimeSeconds))
|
||||
}
|
||||
}
|
||||
|
||||
if len(strategyParams.excludeOwnerKinds) > 0 {
|
||||
ownerRefList := podutil.OwnerRef(pod)
|
||||
for _, owner := range ownerRefList {
|
||||
if strategyParams.excludeOwnerKinds.Has(owner.Kind) {
|
||||
errs = append(errs, fmt.Errorf("pod's owner kind of %s is excluded", owner.Kind))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if len(strategyParams.reasons) > 0 {
|
||||
reasons := getFailedContainerStatusReasons(pod.Status.ContainerStatuses)
|
||||
|
||||
if pod.Status.Phase == v1.PodFailed && pod.Status.Reason != "" {
|
||||
reasons = append(reasons, pod.Status.Reason)
|
||||
}
|
||||
|
||||
if strategyParams.includingInitContainers {
|
||||
reasons = append(reasons, getFailedContainerStatusReasons(pod.Status.InitContainerStatuses)...)
|
||||
}
|
||||
|
||||
if !strategyParams.reasons.HasAny(reasons...) {
|
||||
errs = append(errs, fmt.Errorf("pod does not match any of the reasons"))
|
||||
}
|
||||
}
|
||||
|
||||
return utilerrors.NewAggregate(errs)
|
||||
}
|
||||
|
||||
func getFailedContainerStatusReasons(containerStatuses []v1.ContainerStatus) []string {
|
||||
reasons := make([]string, 0)
|
||||
|
||||
for _, containerStatus := range containerStatuses {
|
||||
if containerStatus.State.Waiting != nil && containerStatus.State.Waiting.Reason != "" {
|
||||
reasons = append(reasons, containerStatus.State.Waiting.Reason)
|
||||
}
|
||||
if containerStatus.State.Terminated != nil && containerStatus.State.Terminated.Reason != "" {
|
||||
reasons = append(reasons, containerStatus.State.Terminated.Reason)
|
||||
}
|
||||
}
|
||||
|
||||
return reasons
|
||||
}
|
||||
@@ -1,358 +0,0 @@
|
||||
package strategies
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
policyv1 "k8s.io/api/policy/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/client-go/informers"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
|
||||
"sigs.k8s.io/descheduler/pkg/api"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
|
||||
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
"sigs.k8s.io/descheduler/test"
|
||||
)
|
||||
|
||||
var (
|
||||
OneHourInSeconds uint = 3600
|
||||
)
|
||||
|
||||
func TestRemoveFailedPods(t *testing.T) {
|
||||
createStrategy := func(enabled, includingInitContainers bool, reasons, excludeKinds []string, minAgeSeconds *uint, nodeFit bool) api.DeschedulerStrategy {
|
||||
return api.DeschedulerStrategy{
|
||||
Enabled: enabled,
|
||||
Params: &api.StrategyParameters{
|
||||
FailedPods: &api.FailedPods{
|
||||
Reasons: reasons,
|
||||
IncludingInitContainers: includingInitContainers,
|
||||
ExcludeOwnerKinds: excludeKinds,
|
||||
MinPodLifetimeSeconds: minAgeSeconds,
|
||||
},
|
||||
NodeFit: nodeFit,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
description string
|
||||
nodes []*v1.Node
|
||||
strategy api.DeschedulerStrategy
|
||||
expectedEvictedPodCount uint
|
||||
pods []*v1.Pod
|
||||
}{
|
||||
{
|
||||
description: "default empty strategy, 0 failures, 0 evictions",
|
||||
strategy: api.DeschedulerStrategy{},
|
||||
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
|
||||
expectedEvictedPodCount: 0,
|
||||
pods: []*v1.Pod{}, // no pods come back with field selector phase=Failed
|
||||
},
|
||||
{
|
||||
description: "0 failures, 0 evictions",
|
||||
strategy: createStrategy(true, false, nil, nil, nil, false),
|
||||
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
|
||||
expectedEvictedPodCount: 0,
|
||||
pods: []*v1.Pod{}, // no pods come back with field selector phase=Failed
|
||||
},
|
||||
{
|
||||
description: "1 container terminated with reason NodeAffinity, 1 eviction",
|
||||
strategy: createStrategy(true, false, nil, nil, nil, false),
|
||||
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
|
||||
expectedEvictedPodCount: 1,
|
||||
pods: []*v1.Pod{
|
||||
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"},
|
||||
}), nil),
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "1 init container terminated with reason NodeAffinity, 1 eviction",
|
||||
strategy: createStrategy(true, true, nil, nil, nil, false),
|
||||
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
|
||||
expectedEvictedPodCount: 1,
|
||||
pods: []*v1.Pod{
|
||||
buildTestPod("p1", "node1", newPodStatus("", "", &v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"},
|
||||
}, nil), nil),
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "1 init container waiting with reason CreateContainerConfigError, 1 eviction",
|
||||
strategy: createStrategy(true, true, nil, nil, nil, false),
|
||||
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
|
||||
expectedEvictedPodCount: 1,
|
||||
pods: []*v1.Pod{
|
||||
buildTestPod("p1", "node1", newPodStatus("", "", &v1.ContainerState{
|
||||
Waiting: &v1.ContainerStateWaiting{Reason: "CreateContainerConfigError"},
|
||||
}, nil), nil),
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "2 init container waiting with reason CreateContainerConfigError, 2 nodes, 2 evictions",
|
||||
strategy: createStrategy(true, true, nil, nil, nil, false),
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("node1", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("node2", 2000, 3000, 10, nil),
|
||||
},
|
||||
expectedEvictedPodCount: 2,
|
||||
pods: []*v1.Pod{
|
||||
buildTestPod("p1", "node1", newPodStatus("", "", &v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{Reason: "CreateContainerConfigError"},
|
||||
}, nil), nil),
|
||||
buildTestPod("p2", "node2", newPodStatus("", "", &v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{Reason: "CreateContainerConfigError"},
|
||||
}, nil), nil),
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "include reason=CreateContainerConfigError, 1 container terminated with reason CreateContainerConfigError, 1 eviction",
|
||||
strategy: createStrategy(true, false, []string{"CreateContainerConfigError"}, nil, nil, false),
|
||||
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
|
||||
expectedEvictedPodCount: 1,
|
||||
pods: []*v1.Pod{
|
||||
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{Reason: "CreateContainerConfigError"},
|
||||
}), nil),
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "include reason=CreateContainerConfigError+NodeAffinity, 1 container terminated with reason CreateContainerConfigError, 1 eviction",
|
||||
strategy: createStrategy(true, false, []string{"CreateContainerConfigError", "NodeAffinity"}, nil, nil, false),
|
||||
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
|
||||
expectedEvictedPodCount: 1,
|
||||
pods: []*v1.Pod{
|
||||
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{Reason: "CreateContainerConfigError"},
|
||||
}), nil),
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "include reason=CreateContainerConfigError, 1 container terminated with reason NodeAffinity, 0 eviction",
|
||||
strategy: createStrategy(true, false, []string{"CreateContainerConfigError"}, nil, nil, false),
|
||||
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
|
||||
expectedEvictedPodCount: 0,
|
||||
pods: []*v1.Pod{
|
||||
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"},
|
||||
}), nil),
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "include init container=false, 1 init container waiting with reason CreateContainerConfigError, 0 eviction",
|
||||
strategy: createStrategy(true, false, []string{"CreateContainerConfigError"}, nil, nil, false),
|
||||
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
|
||||
expectedEvictedPodCount: 0,
|
||||
pods: []*v1.Pod{
|
||||
buildTestPod("p1", "node1", newPodStatus("", "", &v1.ContainerState{
|
||||
Waiting: &v1.ContainerStateWaiting{Reason: "CreateContainerConfigError"},
|
||||
}, nil), nil),
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "lifetime 1 hour, 1 container terminated with reason NodeAffinity, 0 eviction",
|
||||
strategy: createStrategy(true, false, nil, nil, &OneHourInSeconds, false),
|
||||
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
|
||||
expectedEvictedPodCount: 0,
|
||||
pods: []*v1.Pod{
|
||||
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"},
|
||||
}), nil),
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "nodeFit=true, 1 unschedulable node, 1 container terminated with reason NodeAffinity, 0 eviction",
|
||||
strategy: createStrategy(true, false, nil, nil, nil, true),
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode("node1", 2000, 3000, 10, nil),
|
||||
test.BuildTestNode("node2", 2000, 2000, 10, func(node *v1.Node) {
|
||||
node.Spec.Unschedulable = true
|
||||
}),
|
||||
},
|
||||
expectedEvictedPodCount: 0,
|
||||
pods: []*v1.Pod{
|
||||
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"},
|
||||
}), nil),
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "nodeFit=true, only available node does not have enough resources, 1 container terminated with reason CreateContainerConfigError, 0 eviction",
|
||||
strategy: createStrategy(true, false, []string{"CreateContainerConfigError"}, nil, nil, true),
|
||||
nodes: []*v1.Node{test.BuildTestNode("node1", 1, 1, 10, nil), test.BuildTestNode("node2", 0, 0, 10, nil)},
|
||||
expectedEvictedPodCount: 0,
|
||||
pods: []*v1.Pod{
|
||||
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{Reason: "CreateContainerConfigError"},
|
||||
}), nil),
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "excluded owner kind=ReplicaSet, 1 init container terminated with owner kind=ReplicaSet, 0 eviction",
|
||||
strategy: createStrategy(true, true, nil, []string{"ReplicaSet"}, nil, false),
|
||||
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
|
||||
expectedEvictedPodCount: 0,
|
||||
pods: []*v1.Pod{
|
||||
buildTestPod("p1", "node1", newPodStatus("", "", &v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"},
|
||||
}, nil), nil),
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "excluded owner kind=DaemonSet, 1 init container terminated with owner kind=ReplicaSet, 1 eviction",
|
||||
strategy: createStrategy(true, true, nil, []string{"DaemonSet"}, nil, false),
|
||||
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
|
||||
expectedEvictedPodCount: 1,
|
||||
pods: []*v1.Pod{
|
||||
buildTestPod("p1", "node1", newPodStatus("", "", &v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"},
|
||||
}, nil), nil),
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "excluded owner kind=DaemonSet, 1 init container terminated with owner kind=ReplicaSet, 1 pod in termination; nothing should be moved",
|
||||
strategy: createStrategy(true, true, nil, []string{"DaemonSet"}, nil, false),
|
||||
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
|
||||
expectedEvictedPodCount: 0,
|
||||
pods: []*v1.Pod{
|
||||
buildTestPod("p1", "node1", newPodStatus("", "", &v1.ContainerState{
|
||||
Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"},
|
||||
}, nil), &metav1.Time{}),
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "1 container terminated with reason ShutDown, 0 evictions",
|
||||
strategy: createStrategy(true, false, nil, nil, nil, true),
|
||||
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
|
||||
expectedEvictedPodCount: 0,
|
||||
pods: []*v1.Pod{
|
||||
buildTestPod("p1", "node1", newPodStatus("Shutdown", v1.PodFailed, nil, nil), nil),
|
||||
},
|
||||
},
|
||||
{
|
||||
description: "include reason=Shutdown, 2 containers terminated with reason ShutDown, 2 evictions",
|
||||
strategy: createStrategy(true, false, []string{"Shutdown"}, nil, nil, false),
|
||||
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
|
||||
expectedEvictedPodCount: 2,
|
||||
pods: []*v1.Pod{
|
||||
buildTestPod("p1", "node1", newPodStatus("Shutdown", v1.PodFailed, nil, nil), nil),
|
||||
buildTestPod("p2", "node1", newPodStatus("Shutdown", v1.PodFailed, nil, nil), nil),
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
var objs []runtime.Object
|
||||
for _, node := range tc.nodes {
|
||||
objs = append(objs, node)
|
||||
}
|
||||
for _, pod := range tc.pods {
|
||||
objs = append(objs, pod)
|
||||
}
|
||||
fakeClient := fake.NewSimpleClientset(objs...)
|
||||
|
||||
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
|
||||
podInformer := sharedInformerFactory.Core().V1().Pods()
|
||||
|
||||
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
|
||||
if err != nil {
|
||||
t.Errorf("Build get pods assigned to node function error: %v", err)
|
||||
}
|
||||
|
||||
sharedInformerFactory.Start(ctx.Done())
|
||||
sharedInformerFactory.WaitForCacheSync(ctx.Done())
|
||||
|
||||
podEvictor := evictions.NewPodEvictor(
|
||||
fakeClient,
|
||||
policyv1.SchemeGroupVersion.String(),
|
||||
false,
|
||||
nil,
|
||||
nil,
|
||||
tc.nodes,
|
||||
getPodsAssignedToNode,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
)
|
||||
|
||||
RemoveFailedPods(ctx, fakeClient, tc.strategy, tc.nodes, podEvictor, getPodsAssignedToNode)
|
||||
actualEvictedPodCount := podEvictor.TotalEvicted()
|
||||
if actualEvictedPodCount != tc.expectedEvictedPodCount {
|
||||
t.Errorf("Test %#v failed, expected %v pod evictions, but got %v pod evictions\n", tc.description, tc.expectedEvictedPodCount, actualEvictedPodCount)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidRemoveFailedPodsParams(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
fakeClient := &fake.Clientset{}
|
||||
testCases := []struct {
|
||||
name string
|
||||
params *api.StrategyParameters
|
||||
}{
|
||||
{name: "validate nil params", params: nil},
|
||||
{name: "validate empty params", params: &api.StrategyParameters{}},
|
||||
{name: "validate reasons params", params: &api.StrategyParameters{FailedPods: &api.FailedPods{
|
||||
Reasons: []string{"CreateContainerConfigError"},
|
||||
}}},
|
||||
{name: "validate includingInitContainers params", params: &api.StrategyParameters{FailedPods: &api.FailedPods{
|
||||
IncludingInitContainers: true,
|
||||
}}},
|
||||
{name: "validate excludeOwnerKinds params", params: &api.StrategyParameters{FailedPods: &api.FailedPods{
|
||||
ExcludeOwnerKinds: []string{"Job"},
|
||||
}}},
|
||||
{name: "validate excludeOwnerKinds params", params: &api.StrategyParameters{FailedPods: &api.FailedPods{
|
||||
MinPodLifetimeSeconds: &OneHourInSeconds,
|
||||
}}},
|
||||
}
|
||||
for _, tc := range testCases {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
t.Parallel()
|
||||
params, err := validateAndParseRemoveFailedPodsParams(ctx, fakeClient, tc.params)
|
||||
if err != nil {
|
||||
t.Errorf("strategy params should be valid but got err: %v", err.Error())
|
||||
}
|
||||
if params == nil {
|
||||
t.Errorf("strategy params should return a ValidatedFailedPodsStrategyParams but got nil")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func newPodStatus(reason string, phase v1.PodPhase, initContainerState, containerState *v1.ContainerState) v1.PodStatus {
|
||||
ps := v1.PodStatus{
|
||||
Reason: reason,
|
||||
Phase: phase,
|
||||
}
|
||||
|
||||
if initContainerState != nil {
|
||||
ps.InitContainerStatuses = []v1.ContainerStatus{{State: *initContainerState}}
|
||||
ps.Phase = v1.PodFailed
|
||||
}
|
||||
|
||||
if containerState != nil {
|
||||
ps.ContainerStatuses = []v1.ContainerStatus{{State: *containerState}}
|
||||
ps.Phase = v1.PodFailed
|
||||
}
|
||||
|
||||
return ps
|
||||
}
|
||||
|
||||
func buildTestPod(podName, nodeName string, podStatus v1.PodStatus, deletionTimestamp *metav1.Time) *v1.Pod {
|
||||
pod := test.BuildTestPod(podName, 1, 1, nodeName, func(p *v1.Pod) {
|
||||
p.Status = podStatus
|
||||
})
|
||||
pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
|
||||
pod.ObjectMeta.SetCreationTimestamp(metav1.Now())
|
||||
pod.DeletionTimestamp = deletionTimestamp
|
||||
return pod
|
||||
}
|
||||
435
pkg/descheduler/strategies/lownodeutilization.go
Normal file
435
pkg/descheduler/strategies/lownodeutilization.go
Normal file
@@ -0,0 +1,435 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package strategies
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sort"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/klog"
|
||||
|
||||
"sigs.k8s.io/descheduler/pkg/api"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
|
||||
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
|
||||
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
"sigs.k8s.io/descheduler/pkg/utils"
|
||||
)
|
||||
|
||||
type NodeUsageMap struct {
|
||||
node *v1.Node
|
||||
usage api.ResourceThresholds
|
||||
allPods []*v1.Pod
|
||||
}
|
||||
|
||||
type NodePodsMap map[*v1.Node][]*v1.Pod
|
||||
|
||||
func LowNodeUtilization(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, evictLocalStoragePods bool, podEvictor *evictions.PodEvictor) {
|
||||
if !strategy.Enabled {
|
||||
return
|
||||
}
|
||||
// todo: move to config validation?
|
||||
// TODO: May be create a struct for the strategy as well, so that we don't have to pass along the all the params?
|
||||
if strategy.Params.NodeResourceUtilizationThresholds == nil {
|
||||
klog.V(1).Infof("NodeResourceUtilizationThresholds not set")
|
||||
return
|
||||
}
|
||||
|
||||
thresholds := strategy.Params.NodeResourceUtilizationThresholds.Thresholds
|
||||
if !validateThresholds(thresholds) {
|
||||
return
|
||||
}
|
||||
targetThresholds := strategy.Params.NodeResourceUtilizationThresholds.TargetThresholds
|
||||
if !validateTargetThresholds(targetThresholds) {
|
||||
return
|
||||
}
|
||||
|
||||
npm := createNodePodsMap(ctx, client, nodes)
|
||||
lowNodes, targetNodes := classifyNodes(npm, thresholds, targetThresholds, evictLocalStoragePods)
|
||||
|
||||
klog.V(1).Infof("Criteria for a node under utilization: CPU: %v, Mem: %v, Pods: %v",
|
||||
thresholds[v1.ResourceCPU], thresholds[v1.ResourceMemory], thresholds[v1.ResourcePods])
|
||||
|
||||
if len(lowNodes) == 0 {
|
||||
klog.V(1).Infof("No node is underutilized, nothing to do here, you might tune your thresholds further")
|
||||
return
|
||||
}
|
||||
klog.V(1).Infof("Total number of underutilized nodes: %v", len(lowNodes))
|
||||
|
||||
if len(lowNodes) < strategy.Params.NodeResourceUtilizationThresholds.NumberOfNodes {
|
||||
klog.V(1).Infof("number of nodes underutilized (%v) is less than NumberOfNodes (%v), nothing to do here", len(lowNodes), strategy.Params.NodeResourceUtilizationThresholds.NumberOfNodes)
|
||||
return
|
||||
}
|
||||
|
||||
if len(lowNodes) == len(nodes) {
|
||||
klog.V(1).Infof("all nodes are underutilized, nothing to do here")
|
||||
return
|
||||
}
|
||||
|
||||
if len(targetNodes) == 0 {
|
||||
klog.V(1).Infof("all nodes are under target utilization, nothing to do here")
|
||||
return
|
||||
}
|
||||
|
||||
klog.V(1).Infof("Criteria for a node above target utilization: CPU: %v, Mem: %v, Pods: %v",
|
||||
targetThresholds[v1.ResourceCPU], targetThresholds[v1.ResourceMemory], targetThresholds[v1.ResourcePods])
|
||||
klog.V(1).Infof("Total number of nodes above target utilization: %v", len(targetNodes))
|
||||
|
||||
evictPodsFromTargetNodes(
|
||||
ctx,
|
||||
targetNodes,
|
||||
lowNodes,
|
||||
targetThresholds,
|
||||
evictLocalStoragePods,
|
||||
podEvictor)
|
||||
|
||||
klog.V(1).Infof("Total number of pods evicted: %v", podEvictor.TotalEvicted())
|
||||
}
|
||||
|
||||
func validateThresholds(thresholds api.ResourceThresholds) bool {
|
||||
if thresholds == nil || len(thresholds) == 0 {
|
||||
klog.V(1).Infof("no resource threshold is configured")
|
||||
return false
|
||||
}
|
||||
for name := range thresholds {
|
||||
switch name {
|
||||
case v1.ResourceCPU:
|
||||
continue
|
||||
case v1.ResourceMemory:
|
||||
continue
|
||||
case v1.ResourcePods:
|
||||
continue
|
||||
default:
|
||||
klog.Errorf("only cpu, memory, or pods thresholds can be specified")
|
||||
return false
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
//This function could be merged into above once we are clear.
|
||||
func validateTargetThresholds(targetThresholds api.ResourceThresholds) bool {
|
||||
if targetThresholds == nil {
|
||||
klog.V(1).Infof("no target resource threshold is configured")
|
||||
return false
|
||||
} else if _, ok := targetThresholds[v1.ResourcePods]; !ok {
|
||||
klog.V(1).Infof("no target resource threshold for pods is configured")
|
||||
return false
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// classifyNodes classifies the nodes into low-utilization or high-utilization nodes. If a node lies between
|
||||
// low and high thresholds, it is simply ignored.
|
||||
func classifyNodes(npm NodePodsMap, thresholds api.ResourceThresholds, targetThresholds api.ResourceThresholds, evictLocalStoragePods bool) ([]NodeUsageMap, []NodeUsageMap) {
|
||||
lowNodes, targetNodes := []NodeUsageMap{}, []NodeUsageMap{}
|
||||
for node, pods := range npm {
|
||||
usage := nodeUtilization(node, pods, evictLocalStoragePods)
|
||||
nuMap := NodeUsageMap{
|
||||
node: node,
|
||||
usage: usage,
|
||||
allPods: pods,
|
||||
}
|
||||
// Check if node is underutilized and if we can schedule pods on it.
|
||||
if !nodeutil.IsNodeUnschedulable(node) && IsNodeWithLowUtilization(usage, thresholds) {
|
||||
klog.V(2).Infof("Node %#v is under utilized with usage: %#v", node.Name, usage)
|
||||
lowNodes = append(lowNodes, nuMap)
|
||||
} else if IsNodeAboveTargetUtilization(usage, targetThresholds) {
|
||||
klog.V(2).Infof("Node %#v is over utilized with usage: %#v", node.Name, usage)
|
||||
targetNodes = append(targetNodes, nuMap)
|
||||
} else {
|
||||
if nodeutil.IsNodeUnschedulable(node) {
|
||||
klog.V(2).Infof("Node %#v is unschedulable", node.Name)
|
||||
}
|
||||
klog.V(2).Infof("Node %#v is utilized with usage: %#v", node.Name, usage)
|
||||
}
|
||||
}
|
||||
return lowNodes, targetNodes
|
||||
}
|
||||
|
||||
// evictPodsFromTargetNodes evicts pods based on priority, if all the pods on the node have priority, if not
|
||||
// evicts them based on QoS as fallback option.
|
||||
// TODO: @ravig Break this function into smaller functions.
|
||||
func evictPodsFromTargetNodes(
|
||||
ctx context.Context,
|
||||
targetNodes, lowNodes []NodeUsageMap,
|
||||
targetThresholds api.ResourceThresholds,
|
||||
evictLocalStoragePods bool,
|
||||
podEvictor *evictions.PodEvictor,
|
||||
) {
|
||||
|
||||
SortNodesByUsage(targetNodes)
|
||||
|
||||
// upper bound on total number of pods/cpu/memory to be moved
|
||||
var totalPods, totalCPU, totalMem float64
|
||||
var taintsOfLowNodes = make(map[string][]v1.Taint, len(lowNodes))
|
||||
for _, node := range lowNodes {
|
||||
taintsOfLowNodes[node.node.Name] = node.node.Spec.Taints
|
||||
nodeCapacity := node.node.Status.Capacity
|
||||
if len(node.node.Status.Allocatable) > 0 {
|
||||
nodeCapacity = node.node.Status.Allocatable
|
||||
}
|
||||
// totalPods to be moved
|
||||
podsPercentage := targetThresholds[v1.ResourcePods] - node.usage[v1.ResourcePods]
|
||||
totalPods += ((float64(podsPercentage) * float64(nodeCapacity.Pods().Value())) / 100)
|
||||
|
||||
// totalCPU capacity to be moved
|
||||
if _, ok := targetThresholds[v1.ResourceCPU]; ok {
|
||||
cpuPercentage := targetThresholds[v1.ResourceCPU] - node.usage[v1.ResourceCPU]
|
||||
totalCPU += ((float64(cpuPercentage) * float64(nodeCapacity.Cpu().MilliValue())) / 100)
|
||||
}
|
||||
|
||||
// totalMem capacity to be moved
|
||||
if _, ok := targetThresholds[v1.ResourceMemory]; ok {
|
||||
memPercentage := targetThresholds[v1.ResourceMemory] - node.usage[v1.ResourceMemory]
|
||||
totalMem += ((float64(memPercentage) * float64(nodeCapacity.Memory().Value())) / 100)
|
||||
}
|
||||
}
|
||||
|
||||
klog.V(1).Infof("Total capacity to be moved: CPU:%v, Mem:%v, Pods:%v", totalCPU, totalMem, totalPods)
|
||||
klog.V(1).Infof("********Number of pods evicted from each node:***********")
|
||||
|
||||
for _, node := range targetNodes {
|
||||
nodeCapacity := node.node.Status.Capacity
|
||||
if len(node.node.Status.Allocatable) > 0 {
|
||||
nodeCapacity = node.node.Status.Allocatable
|
||||
}
|
||||
klog.V(3).Infof("evicting pods from node %#v with usage: %#v", node.node.Name, node.usage)
|
||||
|
||||
nonRemovablePods, bestEffortPods, burstablePods, guaranteedPods := classifyPods(node.allPods, evictLocalStoragePods)
|
||||
klog.V(2).Infof("allPods:%v, nonRemovablePods:%v, bestEffortPods:%v, burstablePods:%v, guaranteedPods:%v", len(node.allPods), len(nonRemovablePods), len(bestEffortPods), len(burstablePods), len(guaranteedPods))
|
||||
|
||||
// Check if one pod has priority, if yes, assume that all pods have priority and evict pods based on priority.
|
||||
if node.allPods[0].Spec.Priority != nil {
|
||||
klog.V(1).Infof("All pods have priority associated with them. Evicting pods based on priority")
|
||||
evictablePods := make([]*v1.Pod, 0)
|
||||
evictablePods = append(append(burstablePods, bestEffortPods...), guaranteedPods...)
|
||||
|
||||
// sort the evictable Pods based on priority. This also sorts them based on QoS. If there are multiple pods with same priority, they are sorted based on QoS tiers.
|
||||
sortPodsBasedOnPriority(evictablePods)
|
||||
evictPods(ctx, evictablePods, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, taintsOfLowNodes, podEvictor, node.node)
|
||||
} else {
|
||||
// TODO: Remove this when we support only priority.
|
||||
// Falling back to evicting pods based on priority.
|
||||
klog.V(1).Infof("Evicting pods based on QoS")
|
||||
klog.V(1).Infof("There are %v non-evictable pods on the node", len(nonRemovablePods))
|
||||
// evict best effort pods
|
||||
evictPods(ctx, bestEffortPods, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, taintsOfLowNodes, podEvictor, node.node)
|
||||
// evict burstable pods
|
||||
evictPods(ctx, burstablePods, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, taintsOfLowNodes, podEvictor, node.node)
|
||||
// evict guaranteed pods
|
||||
evictPods(ctx, guaranteedPods, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, taintsOfLowNodes, podEvictor, node.node)
|
||||
}
|
||||
klog.V(1).Infof("%v pods evicted from node %#v with usage %v", podEvictor.NodeEvicted(node.node), node.node.Name, node.usage)
|
||||
}
|
||||
}
|
||||
|
||||
func evictPods(
|
||||
ctx context.Context,
|
||||
inputPods []*v1.Pod,
|
||||
targetThresholds api.ResourceThresholds,
|
||||
nodeCapacity v1.ResourceList,
|
||||
nodeUsage api.ResourceThresholds,
|
||||
totalPods *float64,
|
||||
totalCPU *float64,
|
||||
totalMem *float64,
|
||||
taintsOfLowNodes map[string][]v1.Taint,
|
||||
podEvictor *evictions.PodEvictor,
|
||||
node *v1.Node) {
|
||||
if IsNodeAboveTargetUtilization(nodeUsage, targetThresholds) && (*totalPods > 0 || *totalCPU > 0 || *totalMem > 0) {
|
||||
onePodPercentage := api.Percentage((float64(1) * 100) / float64(nodeCapacity.Pods().Value()))
|
||||
for _, pod := range inputPods {
|
||||
if !utils.PodToleratesTaints(pod, taintsOfLowNodes) {
|
||||
klog.V(3).Infof("Skipping eviction for Pod: %#v, doesn't tolerate node taint", pod.Name)
|
||||
continue
|
||||
}
|
||||
|
||||
cUsage := utils.GetResourceRequest(pod, v1.ResourceCPU)
|
||||
mUsage := utils.GetResourceRequest(pod, v1.ResourceMemory)
|
||||
|
||||
success, err := podEvictor.EvictPod(ctx, pod, node)
|
||||
if err != nil {
|
||||
klog.Errorf("Error evicting pod: (%#v)", err)
|
||||
break
|
||||
}
|
||||
|
||||
if success {
|
||||
klog.V(3).Infof("Evicted pod: %#v", pod.Name)
|
||||
// update remaining pods
|
||||
nodeUsage[v1.ResourcePods] -= onePodPercentage
|
||||
*totalPods--
|
||||
|
||||
// update remaining cpu
|
||||
*totalCPU -= float64(cUsage)
|
||||
nodeUsage[v1.ResourceCPU] -= api.Percentage((float64(cUsage) * 100) / float64(nodeCapacity.Cpu().MilliValue()))
|
||||
|
||||
// update remaining memory
|
||||
*totalMem -= float64(mUsage)
|
||||
nodeUsage[v1.ResourceMemory] -= api.Percentage(float64(mUsage) / float64(nodeCapacity.Memory().Value()) * 100)
|
||||
|
||||
klog.V(3).Infof("updated node usage: %#v", nodeUsage)
|
||||
// check if node utilization drops below target threshold or required capacity (cpu, memory, pods) is moved
|
||||
if !IsNodeAboveTargetUtilization(nodeUsage, targetThresholds) || (*totalPods <= 0 && *totalCPU <= 0 && *totalMem <= 0) {
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func SortNodesByUsage(nodes []NodeUsageMap) {
|
||||
sort.Slice(nodes, func(i, j int) bool {
|
||||
var ti, tj api.Percentage
|
||||
for name, value := range nodes[i].usage {
|
||||
if name == v1.ResourceCPU || name == v1.ResourceMemory || name == v1.ResourcePods {
|
||||
ti += value
|
||||
}
|
||||
}
|
||||
for name, value := range nodes[j].usage {
|
||||
if name == v1.ResourceCPU || name == v1.ResourceMemory || name == v1.ResourcePods {
|
||||
tj += value
|
||||
}
|
||||
}
|
||||
// To return sorted in descending order
|
||||
return ti > tj
|
||||
})
|
||||
}
|
||||
|
||||
// sortPodsBasedOnPriority sorts pods based on priority and if their priorities are equal, they are sorted based on QoS tiers.
|
||||
func sortPodsBasedOnPriority(evictablePods []*v1.Pod) {
|
||||
sort.Slice(evictablePods, func(i, j int) bool {
|
||||
if evictablePods[i].Spec.Priority == nil && evictablePods[j].Spec.Priority != nil {
|
||||
return true
|
||||
}
|
||||
if evictablePods[j].Spec.Priority == nil && evictablePods[i].Spec.Priority != nil {
|
||||
return false
|
||||
}
|
||||
if (evictablePods[j].Spec.Priority == nil && evictablePods[i].Spec.Priority == nil) || (*evictablePods[i].Spec.Priority == *evictablePods[j].Spec.Priority) {
|
||||
if podutil.IsBestEffortPod(evictablePods[i]) {
|
||||
return true
|
||||
}
|
||||
if podutil.IsBurstablePod(evictablePods[i]) && podutil.IsGuaranteedPod(evictablePods[j]) {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
return *evictablePods[i].Spec.Priority < *evictablePods[j].Spec.Priority
|
||||
})
|
||||
}
|
||||
|
||||
// createNodePodsMap returns nodepodsmap with evictable pods on node.
|
||||
func createNodePodsMap(ctx context.Context, client clientset.Interface, nodes []*v1.Node) NodePodsMap {
|
||||
npm := NodePodsMap{}
|
||||
for _, node := range nodes {
|
||||
pods, err := podutil.ListPodsOnANode(ctx, client, node)
|
||||
if err != nil {
|
||||
klog.Warningf("node %s will not be processed, error in accessing its pods (%#v)", node.Name, err)
|
||||
} else {
|
||||
npm[node] = pods
|
||||
}
|
||||
}
|
||||
return npm
|
||||
}
|
||||
|
||||
func IsNodeAboveTargetUtilization(nodeThresholds api.ResourceThresholds, thresholds api.ResourceThresholds) bool {
|
||||
for name, nodeValue := range nodeThresholds {
|
||||
if name == v1.ResourceCPU || name == v1.ResourceMemory || name == v1.ResourcePods {
|
||||
if value, ok := thresholds[name]; !ok {
|
||||
continue
|
||||
} else if nodeValue > value {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func IsNodeWithLowUtilization(nodeThresholds api.ResourceThresholds, thresholds api.ResourceThresholds) bool {
|
||||
for name, nodeValue := range nodeThresholds {
|
||||
if name == v1.ResourceCPU || name == v1.ResourceMemory || name == v1.ResourcePods {
|
||||
if value, ok := thresholds[name]; !ok {
|
||||
continue
|
||||
} else if nodeValue > value {
|
||||
return false
|
||||
}
|
||||
}
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func nodeUtilization(node *v1.Node, pods []*v1.Pod, evictLocalStoragePods bool) api.ResourceThresholds {
|
||||
totalReqs := map[v1.ResourceName]*resource.Quantity{
|
||||
v1.ResourceCPU: {},
|
||||
v1.ResourceMemory: {},
|
||||
}
|
||||
for _, pod := range pods {
|
||||
req, _ := utils.PodRequestsAndLimits(pod)
|
||||
for name, quantity := range req {
|
||||
if name == v1.ResourceCPU || name == v1.ResourceMemory {
|
||||
// As Quantity.Add says: Add adds the provided y quantity to the current value. If the current value is zero,
|
||||
// the format of the quantity will be updated to the format of y.
|
||||
totalReqs[name].Add(quantity)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nodeCapacity := node.Status.Capacity
|
||||
if len(node.Status.Allocatable) > 0 {
|
||||
nodeCapacity = node.Status.Allocatable
|
||||
}
|
||||
|
||||
totalPods := len(pods)
|
||||
return api.ResourceThresholds{
|
||||
v1.ResourceCPU: api.Percentage((float64(totalReqs[v1.ResourceCPU].MilliValue()) * 100) / float64(nodeCapacity.Cpu().MilliValue())),
|
||||
v1.ResourceMemory: api.Percentage(float64(totalReqs[v1.ResourceMemory].Value()) / float64(nodeCapacity.Memory().Value()) * 100),
|
||||
v1.ResourcePods: api.Percentage((float64(totalPods) * 100) / float64(nodeCapacity.Pods().Value())),
|
||||
}
|
||||
}
|
||||
|
||||
func classifyPods(pods []*v1.Pod, evictLocalStoragePods bool) ([]*v1.Pod, []*v1.Pod, []*v1.Pod, []*v1.Pod) {
|
||||
var nonRemovablePods, bestEffortPods, burstablePods, guaranteedPods []*v1.Pod
|
||||
|
||||
// From https://kubernetes.io/docs/tasks/configure-pod-container/quality-service-pod/
|
||||
//
|
||||
// For a Pod to be given a QoS class of Guaranteed:
|
||||
// - every Container in the Pod must have a memory limit and a memory request, and they must be the same.
|
||||
// - every Container in the Pod must have a CPU limit and a CPU request, and they must be the same.
|
||||
// A Pod is given a QoS class of Burstable if:
|
||||
// - the Pod does not meet the criteria for QoS class Guaranteed.
|
||||
// - at least one Container in the Pod has a memory or CPU request.
|
||||
// For a Pod to be given a QoS class of BestEffort, the Containers in the Pod must not have any memory or CPU limits or requests.
|
||||
|
||||
for _, pod := range pods {
|
||||
if !podutil.IsEvictable(pod, evictLocalStoragePods) {
|
||||
nonRemovablePods = append(nonRemovablePods, pod)
|
||||
continue
|
||||
}
|
||||
|
||||
switch utils.GetPodQOS(pod) {
|
||||
case v1.PodQOSGuaranteed:
|
||||
guaranteedPods = append(guaranteedPods, pod)
|
||||
case v1.PodQOSBurstable:
|
||||
burstablePods = append(burstablePods, pod)
|
||||
default: // alias v1.PodQOSBestEffort
|
||||
bestEffortPods = append(bestEffortPods, pod)
|
||||
}
|
||||
}
|
||||
|
||||
return nonRemovablePods, bestEffortPods, burstablePods, guaranteedPods
|
||||
}
|
||||
642
pkg/descheduler/strategies/lownodeutilization_test.go
Normal file
642
pkg/descheduler/strategies/lownodeutilization_test.go
Normal file
@@ -0,0 +1,642 @@
|
||||
/*
|
||||
Copyright 2017 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package strategies
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"reflect"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/api/policy/v1beta1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/fields"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/apimachinery/pkg/runtime/schema"
|
||||
serializer "k8s.io/apimachinery/pkg/runtime/serializer"
|
||||
"k8s.io/apimachinery/pkg/watch"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
core "k8s.io/client-go/testing"
|
||||
"sigs.k8s.io/descheduler/pkg/api"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
|
||||
"sigs.k8s.io/descheduler/pkg/utils"
|
||||
"sigs.k8s.io/descheduler/test"
|
||||
)
|
||||
|
||||
var (
|
||||
lowPriority = int32(0)
|
||||
highPriority = int32(10000)
|
||||
)
|
||||
|
||||
func setRSOwnerRef(pod *v1.Pod) { pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList() }
|
||||
func setDSOwnerRef(pod *v1.Pod) { pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList() }
|
||||
func setNormalOwnerRef(pod *v1.Pod) { pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() }
|
||||
func setHighPriority(pod *v1.Pod) { pod.Spec.Priority = &highPriority }
|
||||
func setLowPriority(pod *v1.Pod) { pod.Spec.Priority = &lowPriority }
|
||||
func setNodeUnschedulable(node *v1.Node) { node.Spec.Unschedulable = true }
|
||||
|
||||
func makeBestEffortPod(pod *v1.Pod) {
|
||||
pod.Spec.Containers[0].Resources.Requests = nil
|
||||
pod.Spec.Containers[0].Resources.Requests = nil
|
||||
pod.Spec.Containers[0].Resources.Limits = nil
|
||||
pod.Spec.Containers[0].Resources.Limits = nil
|
||||
}
|
||||
|
||||
func makeBurstablePod(pod *v1.Pod) {
|
||||
pod.Spec.Containers[0].Resources.Limits = nil
|
||||
pod.Spec.Containers[0].Resources.Limits = nil
|
||||
}
|
||||
|
||||
func makeGuaranteedPod(pod *v1.Pod) {
|
||||
pod.Spec.Containers[0].Resources.Limits[v1.ResourceCPU] = pod.Spec.Containers[0].Resources.Requests[v1.ResourceCPU]
|
||||
pod.Spec.Containers[0].Resources.Limits[v1.ResourceMemory] = pod.Spec.Containers[0].Resources.Requests[v1.ResourceMemory]
|
||||
}
|
||||
|
||||
func TestLowNodeUtilization(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
n1NodeName := "n1"
|
||||
n2NodeName := "n2"
|
||||
n3NodeName := "n3"
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
thresholds, targetThresholds api.ResourceThresholds
|
||||
nodes map[string]*v1.Node
|
||||
pods map[string]*v1.PodList
|
||||
expectedPodsEvicted int
|
||||
evictedPods []string
|
||||
}{
|
||||
{
|
||||
name: "without priorities",
|
||||
thresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 30,
|
||||
v1.ResourcePods: 30,
|
||||
},
|
||||
targetThresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 50,
|
||||
v1.ResourcePods: 50,
|
||||
},
|
||||
nodes: map[string]*v1.Node{
|
||||
n1NodeName: test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil),
|
||||
n2NodeName: test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
|
||||
n3NodeName: test.BuildTestNode(n3NodeName, 4000, 3000, 10, setNodeUnschedulable),
|
||||
},
|
||||
pods: map[string]*v1.PodList{
|
||||
n1NodeName: {
|
||||
Items: []v1.Pod{
|
||||
*test.BuildTestPod("p1", 400, 0, n1NodeName, setRSOwnerRef),
|
||||
*test.BuildTestPod("p2", 400, 0, n1NodeName, setRSOwnerRef),
|
||||
*test.BuildTestPod("p3", 400, 0, n1NodeName, setRSOwnerRef),
|
||||
*test.BuildTestPod("p4", 400, 0, n1NodeName, setRSOwnerRef),
|
||||
*test.BuildTestPod("p5", 400, 0, n1NodeName, setRSOwnerRef),
|
||||
// These won't be evicted.
|
||||
*test.BuildTestPod("p6", 400, 0, n1NodeName, setDSOwnerRef),
|
||||
*test.BuildTestPod("p7", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
// A pod with local storage.
|
||||
setNormalOwnerRef(pod)
|
||||
pod.Spec.Volumes = []v1.Volume{
|
||||
{
|
||||
Name: "sample",
|
||||
VolumeSource: v1.VolumeSource{
|
||||
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
||||
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
|
||||
},
|
||||
},
|
||||
}
|
||||
// A Mirror Pod.
|
||||
pod.Annotations = test.GetMirrorPodAnnotation()
|
||||
}),
|
||||
*test.BuildTestPod("p8", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
// A Critical Pod.
|
||||
pod.Namespace = "kube-system"
|
||||
priority := utils.SystemCriticalPriority
|
||||
pod.Spec.Priority = &priority
|
||||
}),
|
||||
},
|
||||
},
|
||||
n2NodeName: {
|
||||
Items: []v1.Pod{
|
||||
*test.BuildTestPod("p9", 400, 0, n1NodeName, setRSOwnerRef),
|
||||
},
|
||||
},
|
||||
n3NodeName: {},
|
||||
},
|
||||
expectedPodsEvicted: 3,
|
||||
},
|
||||
{
|
||||
name: "with priorities",
|
||||
thresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 30,
|
||||
v1.ResourcePods: 30,
|
||||
},
|
||||
targetThresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 50,
|
||||
v1.ResourcePods: 50,
|
||||
},
|
||||
nodes: map[string]*v1.Node{
|
||||
n1NodeName: test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil),
|
||||
n2NodeName: test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
|
||||
n3NodeName: test.BuildTestNode(n3NodeName, 4000, 3000, 10, setNodeUnschedulable),
|
||||
},
|
||||
pods: map[string]*v1.PodList{
|
||||
n1NodeName: {
|
||||
Items: []v1.Pod{
|
||||
*test.BuildTestPod("p1", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
setRSOwnerRef(pod)
|
||||
setHighPriority(pod)
|
||||
}),
|
||||
*test.BuildTestPod("p2", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
setRSOwnerRef(pod)
|
||||
setHighPriority(pod)
|
||||
}),
|
||||
*test.BuildTestPod("p3", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
setRSOwnerRef(pod)
|
||||
setHighPriority(pod)
|
||||
}),
|
||||
*test.BuildTestPod("p4", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
setRSOwnerRef(pod)
|
||||
setHighPriority(pod)
|
||||
}),
|
||||
*test.BuildTestPod("p5", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
setRSOwnerRef(pod)
|
||||
setLowPriority(pod)
|
||||
}),
|
||||
// These won't be evicted.
|
||||
*test.BuildTestPod("p6", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
setDSOwnerRef(pod)
|
||||
setHighPriority(pod)
|
||||
}),
|
||||
*test.BuildTestPod("p7", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
// A pod with local storage.
|
||||
setNormalOwnerRef(pod)
|
||||
setLowPriority(pod)
|
||||
pod.Spec.Volumes = []v1.Volume{
|
||||
{
|
||||
Name: "sample",
|
||||
VolumeSource: v1.VolumeSource{
|
||||
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
||||
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
|
||||
},
|
||||
},
|
||||
}
|
||||
// A Mirror Pod.
|
||||
pod.Annotations = test.GetMirrorPodAnnotation()
|
||||
}),
|
||||
*test.BuildTestPod("p8", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
// A Critical Pod.
|
||||
pod.Namespace = "kube-system"
|
||||
priority := utils.SystemCriticalPriority
|
||||
pod.Spec.Priority = &priority
|
||||
}),
|
||||
},
|
||||
},
|
||||
n2NodeName: {
|
||||
Items: []v1.Pod{
|
||||
*test.BuildTestPod("p9", 400, 0, n1NodeName, setRSOwnerRef),
|
||||
},
|
||||
},
|
||||
n3NodeName: {},
|
||||
},
|
||||
expectedPodsEvicted: 3,
|
||||
},
|
||||
{
|
||||
name: "without priorities evicting best-effort pods only",
|
||||
thresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 30,
|
||||
v1.ResourcePods: 30,
|
||||
},
|
||||
targetThresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 50,
|
||||
v1.ResourcePods: 50,
|
||||
},
|
||||
nodes: map[string]*v1.Node{
|
||||
n1NodeName: test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil),
|
||||
n2NodeName: test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
|
||||
n3NodeName: test.BuildTestNode(n3NodeName, 4000, 3000, 10, setNodeUnschedulable),
|
||||
},
|
||||
// All pods are assumed to be burstable (test.BuildTestNode always sets both cpu/memory resource requests to some value)
|
||||
pods: map[string]*v1.PodList{
|
||||
n1NodeName: {
|
||||
Items: []v1.Pod{
|
||||
*test.BuildTestPod("p1", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
setRSOwnerRef(pod)
|
||||
makeBestEffortPod(pod)
|
||||
}),
|
||||
*test.BuildTestPod("p2", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
setRSOwnerRef(pod)
|
||||
makeBestEffortPod(pod)
|
||||
}),
|
||||
*test.BuildTestPod("p3", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
setRSOwnerRef(pod)
|
||||
}),
|
||||
*test.BuildTestPod("p4", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
setRSOwnerRef(pod)
|
||||
makeBestEffortPod(pod)
|
||||
}),
|
||||
*test.BuildTestPod("p5", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
setRSOwnerRef(pod)
|
||||
makeBestEffortPod(pod)
|
||||
}),
|
||||
// These won't be evicted.
|
||||
*test.BuildTestPod("p6", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
setDSOwnerRef(pod)
|
||||
}),
|
||||
*test.BuildTestPod("p7", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
// A pod with local storage.
|
||||
setNormalOwnerRef(pod)
|
||||
pod.Spec.Volumes = []v1.Volume{
|
||||
{
|
||||
Name: "sample",
|
||||
VolumeSource: v1.VolumeSource{
|
||||
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
||||
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
|
||||
},
|
||||
},
|
||||
}
|
||||
// A Mirror Pod.
|
||||
pod.Annotations = test.GetMirrorPodAnnotation()
|
||||
}),
|
||||
*test.BuildTestPod("p8", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
// A Critical Pod.
|
||||
pod.Namespace = "kube-system"
|
||||
priority := utils.SystemCriticalPriority
|
||||
pod.Spec.Priority = &priority
|
||||
}),
|
||||
},
|
||||
},
|
||||
n2NodeName: {
|
||||
Items: []v1.Pod{
|
||||
*test.BuildTestPod("p9", 400, 0, n1NodeName, setRSOwnerRef),
|
||||
},
|
||||
},
|
||||
n3NodeName: {},
|
||||
},
|
||||
expectedPodsEvicted: 4,
|
||||
evictedPods: []string{"p1", "p2", "p4", "p5"},
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range testCases {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
fakeClient := &fake.Clientset{}
|
||||
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
|
||||
list := action.(core.ListAction)
|
||||
fieldString := list.GetListRestrictions().Fields.String()
|
||||
if strings.Contains(fieldString, n1NodeName) {
|
||||
return true, test.pods[n1NodeName], nil
|
||||
}
|
||||
if strings.Contains(fieldString, n2NodeName) {
|
||||
return true, test.pods[n2NodeName], nil
|
||||
}
|
||||
if strings.Contains(fieldString, n3NodeName) {
|
||||
return true, test.pods[n3NodeName], nil
|
||||
}
|
||||
return true, nil, fmt.Errorf("Failed to list: %v", list)
|
||||
})
|
||||
fakeClient.Fake.AddReactor("get", "nodes", func(action core.Action) (bool, runtime.Object, error) {
|
||||
getAction := action.(core.GetAction)
|
||||
if node, exists := test.nodes[getAction.GetName()]; exists {
|
||||
return true, node, nil
|
||||
}
|
||||
return true, nil, fmt.Errorf("Wrong node: %v", getAction.GetName())
|
||||
})
|
||||
podsForEviction := make(map[string]struct{})
|
||||
for _, pod := range test.evictedPods {
|
||||
podsForEviction[pod] = struct{}{}
|
||||
}
|
||||
|
||||
evictionFailed := false
|
||||
if len(test.evictedPods) > 0 {
|
||||
fakeClient.Fake.AddReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) {
|
||||
getAction := action.(core.CreateAction)
|
||||
obj := getAction.GetObject()
|
||||
if eviction, ok := obj.(*v1beta1.Eviction); ok {
|
||||
if _, exists := podsForEviction[eviction.Name]; exists {
|
||||
return true, obj, nil
|
||||
}
|
||||
evictionFailed = true
|
||||
return true, nil, fmt.Errorf("pod %q was unexpectedly evicted", eviction.Name)
|
||||
}
|
||||
return true, obj, nil
|
||||
})
|
||||
}
|
||||
|
||||
var nodes []*v1.Node
|
||||
for _, node := range test.nodes {
|
||||
nodes = append(nodes, node)
|
||||
}
|
||||
|
||||
npm := createNodePodsMap(ctx, fakeClient, nodes)
|
||||
lowNodes, targetNodes := classifyNodes(npm, test.thresholds, test.targetThresholds, false)
|
||||
if len(lowNodes) != 1 {
|
||||
t.Errorf("After ignoring unschedulable nodes, expected only one node to be under utilized.")
|
||||
}
|
||||
podEvictor := evictions.NewPodEvictor(
|
||||
fakeClient,
|
||||
"v1",
|
||||
false,
|
||||
test.expectedPodsEvicted,
|
||||
nodes,
|
||||
)
|
||||
|
||||
evictPodsFromTargetNodes(ctx, targetNodes, lowNodes, test.targetThresholds, false, podEvictor)
|
||||
podsEvicted := podEvictor.TotalEvicted()
|
||||
if test.expectedPodsEvicted != podsEvicted {
|
||||
t.Errorf("Expected %#v pods to be evicted but %#v got evicted", test.expectedPodsEvicted, podsEvicted)
|
||||
}
|
||||
if evictionFailed {
|
||||
t.Errorf("Pod evictions failed unexpectedly")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestSortPodsByPriority(t *testing.T) {
|
||||
n1 := test.BuildTestNode("n1", 4000, 3000, 9, nil)
|
||||
|
||||
p1 := test.BuildTestPod("p1", 400, 0, n1.Name, setLowPriority)
|
||||
|
||||
// BestEffort
|
||||
p2 := test.BuildTestPod("p2", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
setHighPriority(pod)
|
||||
makeBestEffortPod(pod)
|
||||
})
|
||||
|
||||
// Burstable
|
||||
p3 := test.BuildTestPod("p3", 400, 0, n1.Name, func(pod *v1.Pod) {
|
||||
setHighPriority(pod)
|
||||
makeBurstablePod(pod)
|
||||
})
|
||||
|
||||
// Guaranteed
|
||||
p4 := test.BuildTestPod("p4", 400, 100, n1.Name, func(pod *v1.Pod) {
|
||||
setHighPriority(pod)
|
||||
makeGuaranteedPod(pod)
|
||||
})
|
||||
|
||||
// Best effort with nil priorities.
|
||||
p5 := test.BuildTestPod("p5", 400, 100, n1.Name, makeBestEffortPod)
|
||||
p5.Spec.Priority = nil
|
||||
|
||||
p6 := test.BuildTestPod("p6", 400, 100, n1.Name, makeGuaranteedPod)
|
||||
p6.Spec.Priority = nil
|
||||
|
||||
podList := []*v1.Pod{p4, p3, p2, p1, p6, p5}
|
||||
|
||||
sortPodsBasedOnPriority(podList)
|
||||
if !reflect.DeepEqual(podList[len(podList)-1], p4) {
|
||||
t.Errorf("Expected last pod in sorted list to be %v which of highest priority and guaranteed but got %v", p4, podList[len(podList)-1])
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateThresholds(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input api.ResourceThresholds
|
||||
succeed bool
|
||||
}{
|
||||
{
|
||||
name: "passing nil map for threshold",
|
||||
input: nil,
|
||||
succeed: false,
|
||||
},
|
||||
{
|
||||
name: "passing no threshold",
|
||||
input: api.ResourceThresholds{},
|
||||
succeed: false,
|
||||
},
|
||||
{
|
||||
name: "passing unsupported resource name",
|
||||
input: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 40,
|
||||
v1.ResourceStorage: 25.5,
|
||||
},
|
||||
succeed: false,
|
||||
},
|
||||
{
|
||||
name: "passing invalid resource name",
|
||||
input: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 40,
|
||||
"coolResource": 42.0,
|
||||
},
|
||||
succeed: false,
|
||||
},
|
||||
{
|
||||
name: "passing a valid threshold with cpu, memory and pods",
|
||||
input: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 20,
|
||||
v1.ResourceMemory: 30,
|
||||
v1.ResourcePods: 40,
|
||||
},
|
||||
succeed: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
isValid := validateThresholds(test.input)
|
||||
|
||||
if isValid != test.succeed {
|
||||
t.Errorf("expected validity of threshold: %#v\nto be %v but got %v instead", test.input, test.succeed, isValid)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func newFake(objects ...runtime.Object) *core.Fake {
|
||||
scheme := runtime.NewScheme()
|
||||
codecs := serializer.NewCodecFactory(scheme)
|
||||
fake.AddToScheme(scheme)
|
||||
o := core.NewObjectTracker(scheme, codecs.UniversalDecoder())
|
||||
for _, obj := range objects {
|
||||
if err := o.Add(obj); err != nil {
|
||||
panic(err)
|
||||
}
|
||||
}
|
||||
|
||||
fakePtr := core.Fake{}
|
||||
fakePtr.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
|
||||
objs, err := o.List(
|
||||
schema.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"},
|
||||
schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Pod"},
|
||||
action.GetNamespace(),
|
||||
)
|
||||
if err != nil {
|
||||
return true, nil, err
|
||||
}
|
||||
|
||||
obj := &v1.PodList{
|
||||
Items: []v1.Pod{},
|
||||
}
|
||||
for _, pod := range objs.(*v1.PodList).Items {
|
||||
podFieldSet := fields.Set(map[string]string{
|
||||
"spec.nodeName": pod.Spec.NodeName,
|
||||
"status.phase": string(pod.Status.Phase),
|
||||
})
|
||||
match := action.(core.ListAction).GetListRestrictions().Fields.Matches(podFieldSet)
|
||||
if !match {
|
||||
continue
|
||||
}
|
||||
obj.Items = append(obj.Items, *pod.DeepCopy())
|
||||
}
|
||||
return true, obj, nil
|
||||
})
|
||||
fakePtr.AddReactor("*", "*", core.ObjectReaction(o))
|
||||
fakePtr.AddWatchReactor("*", core.DefaultWatchReactor(watch.NewFake(), nil))
|
||||
|
||||
return &fakePtr
|
||||
}
|
||||
|
||||
func TestWithTaints(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
strategy := api.DeschedulerStrategy{
|
||||
Enabled: true,
|
||||
Params: api.StrategyParameters{
|
||||
NodeResourceUtilizationThresholds: &api.NodeResourceUtilizationThresholds{
|
||||
Thresholds: api.ResourceThresholds{
|
||||
v1.ResourcePods: 20,
|
||||
},
|
||||
TargetThresholds: api.ResourceThresholds{
|
||||
v1.ResourcePods: 70,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
n1 := test.BuildTestNode("n1", 2000, 3000, 10, nil)
|
||||
n2 := test.BuildTestNode("n2", 1000, 3000, 10, nil)
|
||||
n3 := test.BuildTestNode("n3", 1000, 3000, 10, nil)
|
||||
n3withTaints := n3.DeepCopy()
|
||||
n3withTaints.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: "key",
|
||||
Value: "value",
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
|
||||
podThatToleratesTaint := test.BuildTestPod("tolerate_pod", 200, 0, n1.Name, setRSOwnerRef)
|
||||
podThatToleratesTaint.Spec.Tolerations = []v1.Toleration{
|
||||
{
|
||||
Key: "key",
|
||||
Value: "value",
|
||||
},
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
nodes []*v1.Node
|
||||
pods []*v1.Pod
|
||||
evictionsExpected int
|
||||
}{
|
||||
{
|
||||
name: "No taints",
|
||||
nodes: []*v1.Node{n1, n2, n3},
|
||||
pods: []*v1.Pod{
|
||||
//Node 1 pods
|
||||
test.BuildTestPod(fmt.Sprintf("pod_1_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_2_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_3_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_4_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_5_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_6_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_7_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_8_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
|
||||
// Node 2 pods
|
||||
test.BuildTestPod(fmt.Sprintf("pod_9_%s", n2.Name), 200, 0, n2.Name, setRSOwnerRef),
|
||||
},
|
||||
evictionsExpected: 1,
|
||||
},
|
||||
{
|
||||
name: "No pod tolerates node taint",
|
||||
nodes: []*v1.Node{n1, n3withTaints},
|
||||
pods: []*v1.Pod{
|
||||
//Node 1 pods
|
||||
test.BuildTestPod(fmt.Sprintf("pod_1_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_2_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_3_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_4_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_5_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_6_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_7_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_8_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
|
||||
// Node 3 pods
|
||||
test.BuildTestPod(fmt.Sprintf("pod_9_%s", n3withTaints.Name), 200, 0, n3withTaints.Name, setRSOwnerRef),
|
||||
},
|
||||
evictionsExpected: 0,
|
||||
},
|
||||
{
|
||||
name: "Pod which tolerates node taint",
|
||||
nodes: []*v1.Node{n1, n3withTaints},
|
||||
pods: []*v1.Pod{
|
||||
//Node 1 pods
|
||||
test.BuildTestPod(fmt.Sprintf("pod_1_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_2_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_3_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_4_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_5_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_6_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_7_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
|
||||
podThatToleratesTaint,
|
||||
// Node 3 pods
|
||||
test.BuildTestPod(fmt.Sprintf("pod_9_%s", n3withTaints.Name), 200, 0, n3withTaints.Name, setRSOwnerRef),
|
||||
},
|
||||
evictionsExpected: 1,
|
||||
},
|
||||
}
|
||||
|
||||
for _, item := range tests {
|
||||
t.Run(item.name, func(t *testing.T) {
|
||||
var objs []runtime.Object
|
||||
for _, node := range item.nodes {
|
||||
objs = append(objs, node)
|
||||
}
|
||||
|
||||
for _, pod := range item.pods {
|
||||
objs = append(objs, pod)
|
||||
}
|
||||
|
||||
fakePtr := newFake(objs...)
|
||||
var evictionCounter int
|
||||
fakePtr.PrependReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) {
|
||||
if action.GetSubresource() != "eviction" || action.GetResource().Resource != "pods" {
|
||||
return false, nil, nil
|
||||
}
|
||||
evictionCounter++
|
||||
return true, nil, nil
|
||||
})
|
||||
|
||||
podEvictor := evictions.NewPodEvictor(
|
||||
&fake.Clientset{Fake: *fakePtr},
|
||||
"policy/v1",
|
||||
false,
|
||||
item.evictionsExpected,
|
||||
item.nodes,
|
||||
)
|
||||
|
||||
LowNodeUtilization(ctx, &fake.Clientset{Fake: *fakePtr}, strategy, item.nodes, false, podEvictor)
|
||||
|
||||
if item.evictionsExpected != evictionCounter {
|
||||
t.Errorf("Expected %v evictions, got %v", item.evictionsExpected, evictionCounter)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -18,103 +18,46 @@ package strategies
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/klog"
|
||||
|
||||
"sigs.k8s.io/descheduler/pkg/api"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
|
||||
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
|
||||
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
"sigs.k8s.io/descheduler/pkg/utils"
|
||||
)
|
||||
|
||||
func validatePodsViolatingNodeAffinityParams(params *api.StrategyParameters) error {
|
||||
if params == nil || len(params.NodeAffinityType) == 0 {
|
||||
return fmt.Errorf("NodeAffinityType is empty")
|
||||
}
|
||||
// At most one of include/exclude can be set
|
||||
if params.Namespaces != nil && len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 {
|
||||
return fmt.Errorf("only one of Include/Exclude namespaces can be set")
|
||||
}
|
||||
if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" {
|
||||
return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// RemovePodsViolatingNodeAffinity evicts pods on nodes which violate node affinity
|
||||
func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc) {
|
||||
if err := validatePodsViolatingNodeAffinityParams(strategy.Params); err != nil {
|
||||
klog.ErrorS(err, "Invalid RemovePodsViolatingNodeAffinity parameters")
|
||||
return
|
||||
}
|
||||
thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Failed to get threshold priority from strategy's params")
|
||||
return
|
||||
}
|
||||
|
||||
var includedNamespaces, excludedNamespaces sets.String
|
||||
if strategy.Params.Namespaces != nil {
|
||||
includedNamespaces = sets.NewString(strategy.Params.Namespaces.Include...)
|
||||
excludedNamespaces = sets.NewString(strategy.Params.Namespaces.Exclude...)
|
||||
}
|
||||
|
||||
nodeFit := false
|
||||
if strategy.Params != nil {
|
||||
nodeFit = strategy.Params.NodeFit
|
||||
}
|
||||
|
||||
evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit))
|
||||
|
||||
podFilter, err := podutil.NewOptions().
|
||||
WithNamespaces(includedNamespaces).
|
||||
WithoutNamespaces(excludedNamespaces).
|
||||
WithLabelSelector(strategy.Params.LabelSelector).
|
||||
BuildFilterFunc()
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Error initializing pod filter function")
|
||||
return
|
||||
}
|
||||
|
||||
func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, evictLocalStoragePods bool, podEvictor *evictions.PodEvictor) {
|
||||
for _, nodeAffinity := range strategy.Params.NodeAffinityType {
|
||||
klog.V(2).InfoS("Executing for nodeAffinityType", "nodeAffinity", nodeAffinity)
|
||||
klog.V(2).Infof("Executing for nodeAffinityType: %v", nodeAffinity)
|
||||
|
||||
switch nodeAffinity {
|
||||
case "requiredDuringSchedulingIgnoredDuringExecution":
|
||||
for _, node := range nodes {
|
||||
klog.V(1).InfoS("Processing node", "node", klog.KObj(node))
|
||||
klog.V(1).Infof("Processing node: %#v\n", node.Name)
|
||||
|
||||
pods, err := podutil.ListPodsOnANode(
|
||||
node.Name,
|
||||
getPodsAssignedToNode,
|
||||
podutil.WrapFilterFuncs(podFilter, func(pod *v1.Pod) bool {
|
||||
return evictable.IsEvictable(pod) &&
|
||||
!nodeutil.PodFitsCurrentNode(getPodsAssignedToNode, pod, node) &&
|
||||
nodeutil.PodFitsAnyNode(getPodsAssignedToNode, pod, nodes)
|
||||
}),
|
||||
)
|
||||
pods, err := podutil.ListEvictablePodsOnNode(ctx, client, node, evictLocalStoragePods)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Failed to get pods", "node", klog.KObj(node))
|
||||
klog.Errorf("failed to get pods from %v: %v", node.Name, err)
|
||||
}
|
||||
|
||||
for _, pod := range pods {
|
||||
if pod.Spec.Affinity != nil && pod.Spec.Affinity.NodeAffinity != nil && pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil {
|
||||
klog.V(1).InfoS("Evicting pod", "pod", klog.KObj(pod))
|
||||
if _, err := podEvictor.EvictPod(ctx, pod, node, "NodeAffinity"); err != nil {
|
||||
klog.ErrorS(err, "Error evicting pod")
|
||||
break
|
||||
if !nodeutil.PodFitsCurrentNode(pod, node) && nodeutil.PodFitsAnyNode(pod, nodes) {
|
||||
klog.V(1).Infof("Evicting pod: %v", pod.Name)
|
||||
if _, err := podEvictor.EvictPod(ctx, pod, node); err != nil {
|
||||
klog.Errorf("Error evicting pod: (%#v)", err)
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
default:
|
||||
klog.ErrorS(nil, "Invalid nodeAffinityType", "nodeAffinity", nodeAffinity)
|
||||
klog.Errorf("invalid nodeAffinityType: %v", nodeAffinity)
|
||||
}
|
||||
}
|
||||
klog.V(1).Infof("Evicted %v pods", podEvictor.TotalEvicted())
|
||||
}
|
||||
|
||||
@@ -20,39 +20,26 @@ import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
policyv1 "k8s.io/api/policy/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/client-go/informers"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
|
||||
core "k8s.io/client-go/testing"
|
||||
"sigs.k8s.io/descheduler/pkg/api"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
|
||||
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
"sigs.k8s.io/descheduler/test"
|
||||
)
|
||||
|
||||
func TestRemovePodsViolatingNodeAffinity(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
requiredDuringSchedulingIgnoredDuringExecutionStrategy := api.DeschedulerStrategy{
|
||||
Enabled: true,
|
||||
Params: &api.StrategyParameters{
|
||||
Params: api.StrategyParameters{
|
||||
NodeAffinityType: []string{
|
||||
"requiredDuringSchedulingIgnoredDuringExecution",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
requiredDuringSchedulingIgnoredDuringExecutionWithNodeFitStrategy := api.DeschedulerStrategy{
|
||||
Enabled: true,
|
||||
Params: &api.StrategyParameters{
|
||||
NodeAffinityType: []string{
|
||||
"requiredDuringSchedulingIgnoredDuringExecution",
|
||||
},
|
||||
NodeFit: true,
|
||||
},
|
||||
}
|
||||
|
||||
nodeLabelKey := "kubernetes.io/desiredNode"
|
||||
nodeLabelValue := "yes"
|
||||
nodeWithLabels := test.BuildTestNode("nodeWithLabels", 2000, 3000, 10, nil)
|
||||
@@ -61,10 +48,10 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) {
|
||||
nodeWithoutLabels := test.BuildTestNode("nodeWithoutLabels", 2000, 3000, 10, nil)
|
||||
|
||||
unschedulableNodeWithLabels := test.BuildTestNode("unschedulableNodeWithLabels", 2000, 3000, 10, nil)
|
||||
unschedulableNodeWithLabels.Labels[nodeLabelKey] = nodeLabelValue
|
||||
nodeWithLabels.Labels[nodeLabelKey] = nodeLabelValue
|
||||
unschedulableNodeWithLabels.Spec.Unschedulable = true
|
||||
|
||||
addPodsToNode := func(node *v1.Node, deletionTimestamp *metav1.Time) []*v1.Pod {
|
||||
addPodsToNode := func(node *v1.Node) []v1.Pod {
|
||||
podWithNodeAffinity := test.BuildTestPod("podWithNodeAffinity", 100, 0, node.Name, nil)
|
||||
podWithNodeAffinity.Spec.Affinity = &v1.Affinity{
|
||||
NodeAffinity: &v1.NodeAffinity{
|
||||
@@ -93,148 +80,89 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) {
|
||||
pod1.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
pod2.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
|
||||
pod1.DeletionTimestamp = deletionTimestamp
|
||||
pod2.DeletionTimestamp = deletionTimestamp
|
||||
|
||||
return []*v1.Pod{
|
||||
podWithNodeAffinity,
|
||||
pod1,
|
||||
pod2,
|
||||
return []v1.Pod{
|
||||
*podWithNodeAffinity,
|
||||
*pod1,
|
||||
*pod2,
|
||||
}
|
||||
}
|
||||
|
||||
var uint1 uint = 1
|
||||
tests := []struct {
|
||||
description string
|
||||
nodes []*v1.Node
|
||||
pods []*v1.Pod
|
||||
strategy api.DeschedulerStrategy
|
||||
expectedEvictedPodCount uint
|
||||
maxPodsToEvictPerNode *uint
|
||||
maxNoOfPodsToEvictPerNamespace *uint
|
||||
description string
|
||||
nodes []*v1.Node
|
||||
pods []v1.Pod
|
||||
strategy api.DeschedulerStrategy
|
||||
expectedEvictedPodCount int
|
||||
maxPodsToEvict int
|
||||
}{
|
||||
{
|
||||
description: "Invalid strategy type, should not evict any pods",
|
||||
strategy: api.DeschedulerStrategy{
|
||||
Enabled: true,
|
||||
Params: &api.StrategyParameters{
|
||||
Params: api.StrategyParameters{
|
||||
NodeAffinityType: []string{
|
||||
"requiredDuringSchedulingRequiredDuringExecution",
|
||||
},
|
||||
},
|
||||
},
|
||||
expectedEvictedPodCount: 0,
|
||||
pods: addPodsToNode(nodeWithoutLabels, nil),
|
||||
pods: addPodsToNode(nodeWithoutLabels),
|
||||
nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels},
|
||||
maxPodsToEvict: 0,
|
||||
},
|
||||
{
|
||||
description: "Pod is correctly scheduled on node, no eviction expected",
|
||||
strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy,
|
||||
expectedEvictedPodCount: 0,
|
||||
pods: addPodsToNode(nodeWithLabels, nil),
|
||||
pods: addPodsToNode(nodeWithLabels),
|
||||
nodes: []*v1.Node{nodeWithLabels},
|
||||
maxPodsToEvict: 0,
|
||||
},
|
||||
{
|
||||
description: "Pod is scheduled on node without matching labels, another schedulable node available, should be evicted",
|
||||
expectedEvictedPodCount: 1,
|
||||
strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy,
|
||||
pods: addPodsToNode(nodeWithoutLabels, nil),
|
||||
pods: addPodsToNode(nodeWithoutLabels),
|
||||
nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels},
|
||||
maxPodsToEvict: 0,
|
||||
},
|
||||
{
|
||||
description: "Pod is scheduled on node without matching labels, another schedulable node available, maxPodsToEvictPerNode set to 1, should not be evicted",
|
||||
description: "Pod is scheduled on node without matching labels, another schedulable node available, maxPodsToEvict set to 1, should not be evicted",
|
||||
expectedEvictedPodCount: 1,
|
||||
strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy,
|
||||
pods: addPodsToNode(nodeWithoutLabels, nil),
|
||||
pods: addPodsToNode(nodeWithoutLabels),
|
||||
nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels},
|
||||
maxPodsToEvictPerNode: &uint1,
|
||||
},
|
||||
{
|
||||
description: "Pod is scheduled on node without matching labels, another schedulable node available, maxPodsToEvictPerNode set to 1, no pod evicted since pod terminting",
|
||||
expectedEvictedPodCount: 1,
|
||||
strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy,
|
||||
pods: addPodsToNode(nodeWithoutLabels, &metav1.Time{}),
|
||||
nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels},
|
||||
maxPodsToEvictPerNode: &uint1,
|
||||
},
|
||||
{
|
||||
description: "Pod is scheduled on node without matching labels, another schedulable node available, maxNoOfPodsToEvictPerNamespace set to 1, should not be evicted",
|
||||
expectedEvictedPodCount: 1,
|
||||
strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy,
|
||||
pods: addPodsToNode(nodeWithoutLabels, nil),
|
||||
nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels},
|
||||
maxNoOfPodsToEvictPerNamespace: &uint1,
|
||||
},
|
||||
{
|
||||
description: "Pod is scheduled on node without matching labels, another schedulable node available, maxNoOfPodsToEvictPerNamespace set to 1, no pod evicted since pod terminting",
|
||||
expectedEvictedPodCount: 1,
|
||||
strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy,
|
||||
pods: addPodsToNode(nodeWithoutLabels, &metav1.Time{}),
|
||||
nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels},
|
||||
maxNoOfPodsToEvictPerNamespace: &uint1,
|
||||
maxPodsToEvict: 1,
|
||||
},
|
||||
{
|
||||
description: "Pod is scheduled on node without matching labels, but no node where pod fits is available, should not evict",
|
||||
expectedEvictedPodCount: 0,
|
||||
strategy: requiredDuringSchedulingIgnoredDuringExecutionWithNodeFitStrategy,
|
||||
pods: addPodsToNode(nodeWithoutLabels, nil),
|
||||
strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy,
|
||||
pods: addPodsToNode(nodeWithoutLabels),
|
||||
nodes: []*v1.Node{nodeWithoutLabels, unschedulableNodeWithLabels},
|
||||
},
|
||||
{
|
||||
description: "Pod is scheduled on node without matching labels, and node where pod fits is available, should evict",
|
||||
expectedEvictedPodCount: 0,
|
||||
strategy: requiredDuringSchedulingIgnoredDuringExecutionWithNodeFitStrategy,
|
||||
pods: addPodsToNode(nodeWithoutLabels, nil),
|
||||
nodes: []*v1.Node{nodeWithLabels, unschedulableNodeWithLabels},
|
||||
maxPodsToEvictPerNode: &uint1,
|
||||
maxPodsToEvict: 0,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
var objs []runtime.Object
|
||||
for _, node := range tc.nodes {
|
||||
objs = append(objs, node)
|
||||
}
|
||||
for _, pod := range tc.pods {
|
||||
objs = append(objs, pod)
|
||||
}
|
||||
fakeClient := fake.NewSimpleClientset(objs...)
|
||||
|
||||
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
|
||||
podInformer := sharedInformerFactory.Core().V1().Pods()
|
||||
|
||||
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
|
||||
if err != nil {
|
||||
t.Errorf("Build get pods assigned to node function error: %v", err)
|
||||
}
|
||||
|
||||
sharedInformerFactory.Start(ctx.Done())
|
||||
sharedInformerFactory.WaitForCacheSync(ctx.Done())
|
||||
|
||||
podEvictor := evictions.NewPodEvictor(
|
||||
fakeClient,
|
||||
policyv1.SchemeGroupVersion.String(),
|
||||
false,
|
||||
tc.maxPodsToEvictPerNode,
|
||||
tc.maxNoOfPodsToEvictPerNamespace,
|
||||
tc.nodes,
|
||||
getPodsAssignedToNode,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
)
|
||||
|
||||
RemovePodsViolatingNodeAffinity(ctx, fakeClient, tc.strategy, tc.nodes, podEvictor, getPodsAssignedToNode)
|
||||
actualEvictedPodCount := podEvictor.TotalEvicted()
|
||||
if actualEvictedPodCount != tc.expectedEvictedPodCount {
|
||||
t.Errorf("Test %#v failed, expected %v pod evictions, but got %v pod evictions\n", tc.description, tc.expectedEvictedPodCount, actualEvictedPodCount)
|
||||
}
|
||||
fakeClient := &fake.Clientset{}
|
||||
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
|
||||
return true, &v1.PodList{Items: tc.pods}, nil
|
||||
})
|
||||
|
||||
podEvictor := evictions.NewPodEvictor(
|
||||
fakeClient,
|
||||
"v1",
|
||||
false,
|
||||
tc.maxPodsToEvict,
|
||||
tc.nodes,
|
||||
)
|
||||
|
||||
RemovePodsViolatingNodeAffinity(ctx, fakeClient, tc.strategy, tc.nodes, false, podEvictor)
|
||||
actualEvictedPodCount := podEvictor.TotalEvicted()
|
||||
if actualEvictedPodCount != tc.expectedEvictedPodCount {
|
||||
t.Errorf("Test %#v failed, expected %v pod evictions, but got %v pod evictions\n", tc.description, tc.expectedEvictedPodCount, actualEvictedPodCount)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -18,95 +18,22 @@ package strategies
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"k8s.io/apimachinery/pkg/util/sets"
|
||||
"sigs.k8s.io/descheduler/pkg/api"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
|
||||
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
"sigs.k8s.io/descheduler/pkg/utils"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/klog/v2"
|
||||
"k8s.io/klog"
|
||||
)
|
||||
|
||||
func validateRemovePodsViolatingNodeTaintsParams(params *api.StrategyParameters) error {
|
||||
if params == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// At most one of include/exclude can be set
|
||||
if params.Namespaces != nil && len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 {
|
||||
return fmt.Errorf("only one of Include/Exclude namespaces can be set")
|
||||
}
|
||||
if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" {
|
||||
return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// RemovePodsViolatingNodeTaints evicts pods on the node which violate NoSchedule Taints on nodes
|
||||
func RemovePodsViolatingNodeTaints(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc) {
|
||||
if err := validateRemovePodsViolatingNodeTaintsParams(strategy.Params); err != nil {
|
||||
klog.ErrorS(err, "Invalid RemovePodsViolatingNodeTaints parameters")
|
||||
return
|
||||
}
|
||||
|
||||
var includedNamespaces, excludedNamespaces, excludedTaints sets.String
|
||||
var labelSelector *metav1.LabelSelector
|
||||
if strategy.Params != nil {
|
||||
if strategy.Params.Namespaces != nil {
|
||||
includedNamespaces = sets.NewString(strategy.Params.Namespaces.Include...)
|
||||
excludedNamespaces = sets.NewString(strategy.Params.Namespaces.Exclude...)
|
||||
}
|
||||
if strategy.Params.ExcludedTaints != nil {
|
||||
excludedTaints = sets.NewString(strategy.Params.ExcludedTaints...)
|
||||
}
|
||||
labelSelector = strategy.Params.LabelSelector
|
||||
}
|
||||
|
||||
thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Failed to get threshold priority from strategy's params")
|
||||
return
|
||||
}
|
||||
|
||||
nodeFit := false
|
||||
if strategy.Params != nil {
|
||||
nodeFit = strategy.Params.NodeFit
|
||||
}
|
||||
|
||||
evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit))
|
||||
|
||||
podFilter, err := podutil.NewOptions().
|
||||
WithFilter(evictable.IsEvictable).
|
||||
WithNamespaces(includedNamespaces).
|
||||
WithoutNamespaces(excludedNamespaces).
|
||||
WithLabelSelector(labelSelector).
|
||||
BuildFilterFunc()
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Error initializing pod filter function")
|
||||
return
|
||||
}
|
||||
|
||||
excludeTaint := func(taint *v1.Taint) bool {
|
||||
// Exclude taints by key *or* key=value
|
||||
return excludedTaints.Has(taint.Key) || (taint.Value != "" && excludedTaints.Has(fmt.Sprintf("%s=%s", taint.Key, taint.Value)))
|
||||
}
|
||||
|
||||
taintFilterFnc := func(taint *v1.Taint) bool { return (taint.Effect == v1.TaintEffectNoSchedule) && !excludeTaint(taint) }
|
||||
if strategy.Params != nil && strategy.Params.IncludePreferNoSchedule {
|
||||
taintFilterFnc = func(taint *v1.Taint) bool {
|
||||
return (taint.Effect == v1.TaintEffectNoSchedule || taint.Effect == v1.TaintEffectPreferNoSchedule) && !excludeTaint(taint)
|
||||
}
|
||||
}
|
||||
|
||||
func RemovePodsViolatingNodeTaints(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, evictLocalStoragePods bool, podEvictor *evictions.PodEvictor) {
|
||||
for _, node := range nodes {
|
||||
klog.V(1).InfoS("Processing node", "node", klog.KObj(node))
|
||||
pods, err := podutil.ListAllPodsOnANode(node.Name, getPodsAssignedToNode, podFilter)
|
||||
klog.V(1).Infof("Processing node: %#v\n", node.Name)
|
||||
pods, err := podutil.ListEvictablePodsOnNode(ctx, client, node, evictLocalStoragePods)
|
||||
if err != nil {
|
||||
//no pods evicted as error encountered retrieving evictable Pods
|
||||
return
|
||||
@@ -116,11 +43,11 @@ func RemovePodsViolatingNodeTaints(ctx context.Context, client clientset.Interfa
|
||||
if !utils.TolerationsTolerateTaintsWithFilter(
|
||||
pods[i].Spec.Tolerations,
|
||||
node.Spec.Taints,
|
||||
taintFilterFnc,
|
||||
func(taint *v1.Taint) bool { return taint.Effect == v1.TaintEffectNoSchedule },
|
||||
) {
|
||||
klog.V(2).InfoS("Not all taints with NoSchedule effect are tolerated after update for pod on node", "pod", klog.KObj(pods[i]), "node", klog.KObj(node))
|
||||
if _, err := podEvictor.EvictPod(ctx, pods[i], node, "NodeTaint"); err != nil {
|
||||
klog.ErrorS(err, "Error evicting pod")
|
||||
klog.V(2).Infof("Not all taints with NoSchedule effect are tolerated after update for pod %v on node %v", pods[i].Name, node.Name)
|
||||
if _, err := podEvictor.EvictPod(ctx, pods[i], node); err != nil {
|
||||
klog.Errorf("Error evicting pod: (%#v)", err)
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,16 +5,13 @@ import (
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
policyv1 "k8s.io/api/policy/v1"
|
||||
"k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/client-go/informers"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
|
||||
core "k8s.io/client-go/testing"
|
||||
"sigs.k8s.io/descheduler/pkg/api"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
|
||||
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
"sigs.k8s.io/descheduler/pkg/utils"
|
||||
"sigs.k8s.io/descheduler/test"
|
||||
)
|
||||
@@ -27,14 +24,6 @@ func createNoScheduleTaint(key, value string, index int) v1.Taint {
|
||||
}
|
||||
}
|
||||
|
||||
func createPreferNoScheduleTaint(key, value string, index int) v1.Taint {
|
||||
return v1.Taint{
|
||||
Key: "testTaint" + fmt.Sprintf("%v", index),
|
||||
Value: "test" + fmt.Sprintf("%v", index),
|
||||
Effect: v1.TaintEffectPreferNoSchedule,
|
||||
}
|
||||
}
|
||||
|
||||
func addTaintsToNode(node *v1.Node, key, value string, indices []int) *v1.Node {
|
||||
taints := []v1.Taint{}
|
||||
for _, index := range indices {
|
||||
@@ -44,42 +33,22 @@ func addTaintsToNode(node *v1.Node, key, value string, indices []int) *v1.Node {
|
||||
return node
|
||||
}
|
||||
|
||||
func addTolerationToPod(pod *v1.Pod, key, value string, index int, effect v1.TaintEffect) *v1.Pod {
|
||||
func addTolerationToPod(pod *v1.Pod, key, value string, index int) *v1.Pod {
|
||||
if pod.Annotations == nil {
|
||||
pod.Annotations = map[string]string{}
|
||||
}
|
||||
|
||||
pod.Spec.Tolerations = []v1.Toleration{{Key: key + fmt.Sprintf("%v", index), Value: value + fmt.Sprintf("%v", index), Effect: effect}}
|
||||
pod.Spec.Tolerations = []v1.Toleration{{Key: key + fmt.Sprintf("%v", index), Value: value + fmt.Sprintf("%v", index), Effect: v1.TaintEffectNoSchedule}}
|
||||
|
||||
return pod
|
||||
}
|
||||
|
||||
func TestDeletePodsViolatingNodeTaints(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
node1 := test.BuildTestNode("n1", 2000, 3000, 10, nil)
|
||||
node1 = addTaintsToNode(node1, "testTaint", "test", []int{1})
|
||||
node2 := test.BuildTestNode("n2", 2000, 3000, 10, nil)
|
||||
node2 = addTaintsToNode(node2, "testingTaint", "testing", []int{1})
|
||||
|
||||
node3 := test.BuildTestNode("n3", 2000, 3000, 10, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
"datacenter": "east",
|
||||
}
|
||||
})
|
||||
node4 := test.BuildTestNode("n4", 2000, 3000, 10, func(node *v1.Node) {
|
||||
node.Spec = v1.NodeSpec{
|
||||
Unschedulable: true,
|
||||
}
|
||||
})
|
||||
|
||||
node5 := test.BuildTestNode("n5", 2000, 3000, 10, nil)
|
||||
node5.Spec.Taints = []v1.Taint{
|
||||
createPreferNoScheduleTaint("testTaint", "test", 1),
|
||||
}
|
||||
|
||||
node6 := test.BuildTestNode("n6", 1, 1, 1, nil)
|
||||
node6.Spec.Taints = []v1.Taint{
|
||||
createPreferNoScheduleTaint("testTaint", "test", 1),
|
||||
}
|
||||
node1 = addTaintsToNode(node2, "testingTaint", "testing", []int{1})
|
||||
|
||||
p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil)
|
||||
p2 := test.BuildTestPod("p2", 100, 0, node1.Name, nil)
|
||||
@@ -100,15 +69,12 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) {
|
||||
p10 := test.BuildTestPod("p10", 100, 0, node2.Name, nil)
|
||||
p11 := test.BuildTestPod("p11", 100, 0, node2.Name, nil)
|
||||
p11.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
p12 := test.BuildTestPod("p11", 100, 0, node2.Name, nil)
|
||||
p12.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
|
||||
// The following 4 pods won't get evicted.
|
||||
// A Critical Pod.
|
||||
p7.Namespace = "kube-system"
|
||||
priority := utils.SystemCriticalPriority
|
||||
p7.Spec.Priority = &priority
|
||||
p7.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
|
||||
// A daemonset.
|
||||
p8.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
|
||||
@@ -127,240 +93,92 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) {
|
||||
// A Mirror Pod.
|
||||
p10.Annotations = test.GetMirrorPodAnnotation()
|
||||
|
||||
p1 = addTolerationToPod(p1, "testTaint", "test", 1, v1.TaintEffectNoSchedule)
|
||||
p3 = addTolerationToPod(p3, "testTaint", "test", 1, v1.TaintEffectNoSchedule)
|
||||
p4 = addTolerationToPod(p4, "testTaintX", "testX", 1, v1.TaintEffectNoSchedule)
|
||||
|
||||
p12.Spec.NodeSelector = map[string]string{
|
||||
"datacenter": "west",
|
||||
}
|
||||
|
||||
p13 := test.BuildTestPod("p13", 100, 0, node5.Name, nil)
|
||||
p13.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
|
||||
// node5 has PreferNoSchedule:testTaint1=test1, so the p13 has to have
|
||||
// PreferNoSchedule:testTaint0=test0 so the pod is not tolarated
|
||||
p13 = addTolerationToPod(p13, "testTaint", "test", 0, v1.TaintEffectPreferNoSchedule)
|
||||
|
||||
var uint1 uint = 1
|
||||
p1 = addTolerationToPod(p1, "testTaint", "test", 1)
|
||||
p3 = addTolerationToPod(p3, "testTaint", "test", 1)
|
||||
p4 = addTolerationToPod(p4, "testTaintX", "testX", 1)
|
||||
|
||||
tests := []struct {
|
||||
description string
|
||||
nodes []*v1.Node
|
||||
pods []*v1.Pod
|
||||
evictLocalStoragePods bool
|
||||
evictSystemCriticalPods bool
|
||||
maxPodsToEvictPerNode *uint
|
||||
maxNoOfPodsToEvictPerNamespace *uint
|
||||
expectedEvictedPodCount uint
|
||||
nodeFit bool
|
||||
includePreferNoSchedule bool
|
||||
excludedTaints []string
|
||||
description string
|
||||
nodes []*v1.Node
|
||||
pods []v1.Pod
|
||||
evictLocalStoragePods bool
|
||||
maxPodsToEvict int
|
||||
expectedEvictedPodCount int
|
||||
}{
|
||||
|
||||
{
|
||||
description: "Pods not tolerating node taint should be evicted",
|
||||
pods: []*v1.Pod{p1, p2, p3},
|
||||
pods: []v1.Pod{*p1, *p2, *p3},
|
||||
nodes: []*v1.Node{node1},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
maxPodsToEvict: 0,
|
||||
expectedEvictedPodCount: 1, //p2 gets evicted
|
||||
},
|
||||
{
|
||||
description: "Pods with tolerations but not tolerating node taint should be evicted",
|
||||
pods: []*v1.Pod{p1, p3, p4},
|
||||
pods: []v1.Pod{*p1, *p3, *p4},
|
||||
nodes: []*v1.Node{node1},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
maxPodsToEvict: 0,
|
||||
expectedEvictedPodCount: 1, //p4 gets evicted
|
||||
},
|
||||
{
|
||||
description: "Only <maxPodsToEvictPerNode> number of Pods not tolerating node taint should be evicted",
|
||||
pods: []*v1.Pod{p1, p5, p6},
|
||||
description: "Only <maxPodsToEvict> number of Pods not tolerating node taint should be evicted",
|
||||
pods: []v1.Pod{*p1, *p5, *p6},
|
||||
nodes: []*v1.Node{node1},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
maxPodsToEvictPerNode: &uint1,
|
||||
maxPodsToEvict: 1,
|
||||
expectedEvictedPodCount: 1, //p5 or p6 gets evicted
|
||||
},
|
||||
{
|
||||
description: "Only <maxNoOfPodsToEvictPerNamespace> number of Pods not tolerating node taint should be evicted",
|
||||
pods: []*v1.Pod{p1, p5, p6},
|
||||
nodes: []*v1.Node{node1},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
maxNoOfPodsToEvictPerNamespace: &uint1,
|
||||
expectedEvictedPodCount: 1, //p5 or p6 gets evicted
|
||||
},
|
||||
{
|
||||
description: "Critical pods not tolerating node taint should not be evicted",
|
||||
pods: []*v1.Pod{p7, p8, p9, p10},
|
||||
pods: []v1.Pod{*p7, *p8, *p9, *p10},
|
||||
nodes: []*v1.Node{node2},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
expectedEvictedPodCount: 0, //nothing is evicted
|
||||
},
|
||||
{
|
||||
description: "Critical pods except storage pods not tolerating node taint should not be evicted",
|
||||
pods: []*v1.Pod{p7, p8, p9, p10},
|
||||
nodes: []*v1.Node{node2},
|
||||
evictLocalStoragePods: true,
|
||||
evictSystemCriticalPods: false,
|
||||
expectedEvictedPodCount: 1, //p9 gets evicted
|
||||
},
|
||||
{
|
||||
description: "Critical and non critical pods, only non critical pods not tolerating node taint should be evicted",
|
||||
pods: []*v1.Pod{p7, p8, p10, p11},
|
||||
nodes: []*v1.Node{node2},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
expectedEvictedPodCount: 1, //p11 gets evicted
|
||||
},
|
||||
{
|
||||
description: "Critical and non critical pods, pods not tolerating node taint should be evicted even if they are critical",
|
||||
pods: []*v1.Pod{p2, p7, p9, p10},
|
||||
nodes: []*v1.Node{node1, node2},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: true,
|
||||
expectedEvictedPodCount: 2, //p2 and p7 are evicted
|
||||
},
|
||||
{
|
||||
description: "Pod p2 doesn't tolerate taint on it's node, but also doesn't tolerate taints on other nodes",
|
||||
pods: []*v1.Pod{p1, p2, p3},
|
||||
nodes: []*v1.Node{node1, node2},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
expectedEvictedPodCount: 0, //p2 gets evicted
|
||||
nodeFit: true,
|
||||
},
|
||||
{
|
||||
description: "Pod p12 doesn't tolerate taint on it's node, but other nodes don't match it's selector",
|
||||
pods: []*v1.Pod{p1, p3, p12},
|
||||
nodes: []*v1.Node{node1, node3},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
expectedEvictedPodCount: 0, //p2 gets evicted
|
||||
nodeFit: true,
|
||||
},
|
||||
{
|
||||
description: "Pod p2 doesn't tolerate taint on it's node, but other nodes are unschedulable",
|
||||
pods: []*v1.Pod{p1, p2, p3},
|
||||
nodes: []*v1.Node{node1, node4},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
expectedEvictedPodCount: 0, //p2 gets evicted
|
||||
nodeFit: true,
|
||||
},
|
||||
{
|
||||
description: "Pods not tolerating PreferNoSchedule node taint should not be evicted when not enabled",
|
||||
pods: []*v1.Pod{p13},
|
||||
nodes: []*v1.Node{node5},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
maxPodsToEvict: 0,
|
||||
expectedEvictedPodCount: 0,
|
||||
},
|
||||
{
|
||||
description: "Pods not tolerating PreferNoSchedule node taint should be evicted when enabled",
|
||||
pods: []*v1.Pod{p13},
|
||||
nodes: []*v1.Node{node5},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
includePreferNoSchedule: true,
|
||||
expectedEvictedPodCount: 1, // p13 gets evicted
|
||||
description: "Critical pods except storage pods not tolerating node taint should not be evicted",
|
||||
pods: []v1.Pod{*p7, *p8, *p9, *p10},
|
||||
nodes: []*v1.Node{node2},
|
||||
evictLocalStoragePods: true,
|
||||
maxPodsToEvict: 0,
|
||||
expectedEvictedPodCount: 1,
|
||||
},
|
||||
{
|
||||
description: "Pods not tolerating excluded node taints (by key) should not be evicted",
|
||||
pods: []*v1.Pod{p2},
|
||||
nodes: []*v1.Node{node1},
|
||||
description: "Critical and non critical pods, only non critical pods not tolerating node taint should be evicted",
|
||||
pods: []v1.Pod{*p7, *p8, *p10, *p11},
|
||||
nodes: []*v1.Node{node2},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
excludedTaints: []string{"excludedTaint1", "testTaint1"},
|
||||
expectedEvictedPodCount: 0, // nothing gets evicted, as one of the specified excludedTaints matches the key of node1's taint
|
||||
},
|
||||
{
|
||||
description: "Pods not tolerating excluded node taints (by key and value) should not be evicted",
|
||||
pods: []*v1.Pod{p2},
|
||||
nodes: []*v1.Node{node1},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
excludedTaints: []string{"testTaint1=test1"},
|
||||
expectedEvictedPodCount: 0, // nothing gets evicted, as both the key and value of the excluded taint match node1's taint
|
||||
},
|
||||
{
|
||||
description: "The excluded taint matches the key of node1's taint, but does not match the value",
|
||||
pods: []*v1.Pod{p2},
|
||||
nodes: []*v1.Node{node1},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: false,
|
||||
excludedTaints: []string{"testTaint1=test2"},
|
||||
expectedEvictedPodCount: 1, // pod gets evicted, as excluded taint value does not match node1's taint value
|
||||
},
|
||||
{
|
||||
description: "Critical and non critical pods, pods not tolerating node taint can't be evicted because the only available node does not have enough resources.",
|
||||
pods: []*v1.Pod{p2, p7, p9, p10},
|
||||
nodes: []*v1.Node{node1, node6},
|
||||
evictLocalStoragePods: false,
|
||||
evictSystemCriticalPods: true,
|
||||
expectedEvictedPodCount: 0, //p2 and p7 can't be evicted
|
||||
nodeFit: true,
|
||||
maxPodsToEvict: 0,
|
||||
expectedEvictedPodCount: 1,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.description, func(t *testing.T) {
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
var objs []runtime.Object
|
||||
for _, node := range tc.nodes {
|
||||
objs = append(objs, node)
|
||||
}
|
||||
for _, pod := range tc.pods {
|
||||
objs = append(objs, pod)
|
||||
}
|
||||
fakeClient := fake.NewSimpleClientset(objs...)
|
||||
|
||||
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
|
||||
podInformer := sharedInformerFactory.Core().V1().Pods()
|
||||
|
||||
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
|
||||
if err != nil {
|
||||
t.Errorf("Build get pods assigned to node function error: %v", err)
|
||||
}
|
||||
|
||||
sharedInformerFactory.Start(ctx.Done())
|
||||
sharedInformerFactory.WaitForCacheSync(ctx.Done())
|
||||
|
||||
podEvictor := evictions.NewPodEvictor(
|
||||
fakeClient,
|
||||
policyv1.SchemeGroupVersion.String(),
|
||||
false,
|
||||
tc.maxPodsToEvictPerNode,
|
||||
tc.maxNoOfPodsToEvictPerNamespace,
|
||||
tc.nodes,
|
||||
getPodsAssignedToNode,
|
||||
tc.evictLocalStoragePods,
|
||||
tc.evictSystemCriticalPods,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
)
|
||||
|
||||
strategy := api.DeschedulerStrategy{
|
||||
Params: &api.StrategyParameters{
|
||||
NodeFit: tc.nodeFit,
|
||||
IncludePreferNoSchedule: tc.includePreferNoSchedule,
|
||||
ExcludedTaints: tc.excludedTaints,
|
||||
},
|
||||
}
|
||||
|
||||
RemovePodsViolatingNodeTaints(ctx, fakeClient, strategy, tc.nodes, podEvictor, getPodsAssignedToNode)
|
||||
actualEvictedPodCount := podEvictor.TotalEvicted()
|
||||
if actualEvictedPodCount != tc.expectedEvictedPodCount {
|
||||
t.Errorf("Test %#v failed, Unexpected no of pods evicted: pods evicted: %d, expected: %d", tc.description, actualEvictedPodCount, tc.expectedEvictedPodCount)
|
||||
}
|
||||
// create fake client
|
||||
fakeClient := &fake.Clientset{}
|
||||
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
|
||||
return true, &v1.PodList{Items: tc.pods}, nil
|
||||
})
|
||||
|
||||
podEvictor := evictions.NewPodEvictor(
|
||||
fakeClient,
|
||||
"v1",
|
||||
false,
|
||||
tc.maxPodsToEvict,
|
||||
tc.nodes,
|
||||
)
|
||||
|
||||
RemovePodsViolatingNodeTaints(ctx, fakeClient, api.DeschedulerStrategy{}, tc.nodes, tc.evictLocalStoragePods, podEvictor)
|
||||
actualEvictedPodCount := podEvictor.TotalEvicted()
|
||||
if actualEvictedPodCount != tc.expectedEvictedPodCount {
|
||||
t.Errorf("Test %#v failed, Unexpected no of pods evicted: pods evicted: %d, expected: %d", tc.description, actualEvictedPodCount, tc.expectedEvictedPodCount)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestToleratesTaint(t *testing.T) {
|
||||
|
||||
@@ -1,171 +0,0 @@
|
||||
/*
|
||||
Copyright 2021 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package nodeutilization
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
clientset "k8s.io/client-go/kubernetes"
|
||||
"k8s.io/klog/v2"
|
||||
|
||||
"sigs.k8s.io/descheduler/pkg/api"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
|
||||
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
|
||||
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
"sigs.k8s.io/descheduler/pkg/utils"
|
||||
)
|
||||
|
||||
// HighNodeUtilization evicts pods from under utilized nodes so that scheduler can schedule according to its strategy.
|
||||
// Note that CPU/Memory requests are used to calculate nodes' utilization and not the actual resource usage.
|
||||
func HighNodeUtilization(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc) {
|
||||
if err := validateNodeUtilizationParams(strategy.Params); err != nil {
|
||||
klog.ErrorS(err, "Invalid HighNodeUtilization parameters")
|
||||
return
|
||||
}
|
||||
|
||||
nodeFit := false
|
||||
if strategy.Params != nil {
|
||||
nodeFit = strategy.Params.NodeFit
|
||||
}
|
||||
|
||||
thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params)
|
||||
if err != nil {
|
||||
klog.ErrorS(err, "Failed to get threshold priority from strategy's params")
|
||||
return
|
||||
}
|
||||
|
||||
thresholds := strategy.Params.NodeResourceUtilizationThresholds.Thresholds
|
||||
targetThresholds := strategy.Params.NodeResourceUtilizationThresholds.TargetThresholds
|
||||
if err := validateHighUtilizationStrategyConfig(thresholds, targetThresholds); err != nil {
|
||||
klog.ErrorS(err, "HighNodeUtilization config is not valid")
|
||||
return
|
||||
}
|
||||
targetThresholds = make(api.ResourceThresholds)
|
||||
|
||||
setDefaultForThresholds(thresholds, targetThresholds)
|
||||
resourceNames := getResourceNames(targetThresholds)
|
||||
|
||||
sourceNodes, highNodes := classifyNodes(
|
||||
getNodeUsage(nodes, resourceNames, getPodsAssignedToNode),
|
||||
getNodeThresholds(nodes, thresholds, targetThresholds, resourceNames, getPodsAssignedToNode, false),
|
||||
func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool {
|
||||
return isNodeWithLowUtilization(usage, threshold.lowResourceThreshold)
|
||||
},
|
||||
func(node *v1.Node, usage NodeUsage, threshold NodeThresholds) bool {
|
||||
if nodeutil.IsNodeUnschedulable(node) {
|
||||
klog.V(2).InfoS("Node is unschedulable", "node", klog.KObj(node))
|
||||
return false
|
||||
}
|
||||
return !isNodeWithLowUtilization(usage, threshold.lowResourceThreshold)
|
||||
})
|
||||
|
||||
// log message in one line
|
||||
keysAndValues := []interface{}{
|
||||
"CPU", thresholds[v1.ResourceCPU],
|
||||
"Mem", thresholds[v1.ResourceMemory],
|
||||
"Pods", thresholds[v1.ResourcePods],
|
||||
}
|
||||
for name := range thresholds {
|
||||
if !nodeutil.IsBasicResource(name) {
|
||||
keysAndValues = append(keysAndValues, string(name), int64(thresholds[name]))
|
||||
}
|
||||
}
|
||||
|
||||
klog.V(1).InfoS("Criteria for a node below target utilization", keysAndValues...)
|
||||
klog.V(1).InfoS("Number of underutilized nodes", "totalNumber", len(sourceNodes))
|
||||
|
||||
if len(sourceNodes) == 0 {
|
||||
klog.V(1).InfoS("No node is underutilized, nothing to do here, you might tune your thresholds further")
|
||||
return
|
||||
}
|
||||
if len(sourceNodes) <= strategy.Params.NodeResourceUtilizationThresholds.NumberOfNodes {
|
||||
klog.V(1).InfoS("Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here", "underutilizedNodes", len(sourceNodes), "numberOfNodes", strategy.Params.NodeResourceUtilizationThresholds.NumberOfNodes)
|
||||
return
|
||||
}
|
||||
if len(sourceNodes) == len(nodes) {
|
||||
klog.V(1).InfoS("All nodes are underutilized, nothing to do here")
|
||||
return
|
||||
}
|
||||
if len(highNodes) == 0 {
|
||||
klog.V(1).InfoS("No node is available to schedule the pods, nothing to do here")
|
||||
return
|
||||
}
|
||||
|
||||
evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit))
|
||||
|
||||
// stop if the total available usage has dropped to zero - no more pods can be scheduled
|
||||
continueEvictionCond := func(nodeInfo NodeInfo, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool {
|
||||
for name := range totalAvailableUsage {
|
||||
if totalAvailableUsage[name].CmpInt64(0) < 1 {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// Sort the nodes by the usage in ascending order
|
||||
sortNodesByUsage(sourceNodes, true)
|
||||
|
||||
evictPodsFromSourceNodes(
|
||||
ctx,
|
||||
sourceNodes,
|
||||
highNodes,
|
||||
podEvictor,
|
||||
evictable.IsEvictable,
|
||||
resourceNames,
|
||||
"HighNodeUtilization",
|
||||
continueEvictionCond)
|
||||
|
||||
}
|
||||
|
||||
func validateHighUtilizationStrategyConfig(thresholds, targetThresholds api.ResourceThresholds) error {
|
||||
if targetThresholds != nil {
|
||||
return fmt.Errorf("targetThresholds is not applicable for HighNodeUtilization")
|
||||
}
|
||||
if err := validateThresholds(thresholds); err != nil {
|
||||
return fmt.Errorf("thresholds config is not valid: %v", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func setDefaultForThresholds(thresholds, targetThresholds api.ResourceThresholds) {
|
||||
// check if Pods/CPU/Mem are set, if not, set them to 100
|
||||
if _, ok := thresholds[v1.ResourcePods]; !ok {
|
||||
thresholds[v1.ResourcePods] = MaxResourcePercentage
|
||||
}
|
||||
if _, ok := thresholds[v1.ResourceCPU]; !ok {
|
||||
thresholds[v1.ResourceCPU] = MaxResourcePercentage
|
||||
}
|
||||
if _, ok := thresholds[v1.ResourceMemory]; !ok {
|
||||
thresholds[v1.ResourceMemory] = MaxResourcePercentage
|
||||
}
|
||||
|
||||
// Default targetThreshold resource values to 100
|
||||
targetThresholds[v1.ResourcePods] = MaxResourcePercentage
|
||||
targetThresholds[v1.ResourceCPU] = MaxResourcePercentage
|
||||
targetThresholds[v1.ResourceMemory] = MaxResourcePercentage
|
||||
|
||||
for name := range thresholds {
|
||||
if !nodeutil.IsBasicResource(name) {
|
||||
targetThresholds[name] = MaxResourcePercentage
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,731 +0,0 @@
|
||||
/*
|
||||
Copyright 2021 The Kubernetes Authors.
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing, software
|
||||
distributed under the License is distributed on an "AS IS" BASIS,
|
||||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
See the License for the specific language governing permissions and
|
||||
limitations under the License.
|
||||
*/
|
||||
|
||||
package nodeutilization
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
|
||||
v1 "k8s.io/api/core/v1"
|
||||
"k8s.io/api/policy/v1beta1"
|
||||
"k8s.io/apimachinery/pkg/api/resource"
|
||||
"k8s.io/apimachinery/pkg/runtime"
|
||||
"k8s.io/client-go/informers"
|
||||
"k8s.io/client-go/kubernetes/fake"
|
||||
core "k8s.io/client-go/testing"
|
||||
|
||||
"sigs.k8s.io/descheduler/pkg/api"
|
||||
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
|
||||
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
|
||||
"sigs.k8s.io/descheduler/pkg/utils"
|
||||
"sigs.k8s.io/descheduler/test"
|
||||
)
|
||||
|
||||
func TestHighNodeUtilization(t *testing.T) {
|
||||
n1NodeName := "n1"
|
||||
n2NodeName := "n2"
|
||||
n3NodeName := "n3"
|
||||
|
||||
nodeSelectorKey := "datacenter"
|
||||
nodeSelectorValue := "west"
|
||||
|
||||
testCases := []struct {
|
||||
name string
|
||||
thresholds api.ResourceThresholds
|
||||
nodes []*v1.Node
|
||||
pods []*v1.Pod
|
||||
expectedPodsEvicted uint
|
||||
evictedPods []string
|
||||
}{
|
||||
{
|
||||
name: "no node below threshold usage",
|
||||
thresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 20,
|
||||
v1.ResourcePods: 20,
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(n1NodeName, 4000, 3000, 10, nil),
|
||||
test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
|
||||
test.BuildTestNode(n3NodeName, 4000, 3000, 10, nil),
|
||||
},
|
||||
pods: []*v1.Pod{
|
||||
// These won't be evicted.
|
||||
test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p2", 400, 0, n1NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p3", 400, 0, n1NodeName, test.SetRSOwnerRef),
|
||||
// These won't be evicted.
|
||||
test.BuildTestPod("p4", 400, 0, n2NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p5", 400, 0, n2NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p6", 400, 0, n2NodeName, test.SetRSOwnerRef),
|
||||
// These won't be evicted.
|
||||
test.BuildTestPod("p7", 400, 0, n3NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p8", 400, 0, n3NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p9", 400, 0, n3NodeName, test.SetRSOwnerRef),
|
||||
},
|
||||
expectedPodsEvicted: 0,
|
||||
},
|
||||
{
|
||||
name: "no evictable pods",
|
||||
thresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 40,
|
||||
v1.ResourcePods: 40,
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil),
|
||||
test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
|
||||
test.BuildTestNode(n3NodeName, 4000, 3000, 10, nil),
|
||||
},
|
||||
pods: []*v1.Pod{
|
||||
// These won't be evicted.
|
||||
test.BuildTestPod("p1", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
// A pod with local storage.
|
||||
test.SetNormalOwnerRef(pod)
|
||||
pod.Spec.Volumes = []v1.Volume{
|
||||
{
|
||||
Name: "sample",
|
||||
VolumeSource: v1.VolumeSource{
|
||||
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
|
||||
EmptyDir: &v1.EmptyDirVolumeSource{
|
||||
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
|
||||
},
|
||||
},
|
||||
}
|
||||
// A Mirror Pod.
|
||||
pod.Annotations = test.GetMirrorPodAnnotation()
|
||||
}),
|
||||
test.BuildTestPod("p2", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
// A Critical Pod.
|
||||
pod.Namespace = "kube-system"
|
||||
priority := utils.SystemCriticalPriority
|
||||
pod.Spec.Priority = &priority
|
||||
}),
|
||||
// These won't be evicted.
|
||||
test.BuildTestPod("p3", 400, 0, n2NodeName, test.SetDSOwnerRef),
|
||||
test.BuildTestPod("p4", 400, 0, n2NodeName, test.SetDSOwnerRef),
|
||||
test.BuildTestPod("p5", 400, 0, n3NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p6", 400, 0, n3NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p7", 400, 0, n3NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p8", 400, 0, n3NodeName, test.SetRSOwnerRef),
|
||||
},
|
||||
expectedPodsEvicted: 0,
|
||||
},
|
||||
{
|
||||
name: "no node to schedule evicted pods",
|
||||
thresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 20,
|
||||
v1.ResourcePods: 20,
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(n1NodeName, 4000, 3000, 10, nil),
|
||||
test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
|
||||
test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable),
|
||||
},
|
||||
pods: []*v1.Pod{
|
||||
// These can't be evicted.
|
||||
test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef),
|
||||
// These can't be evicted.
|
||||
test.BuildTestPod("p2", 400, 0, n2NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p3", 400, 0, n3NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p4", 400, 0, n3NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p5", 400, 0, n3NodeName, test.SetRSOwnerRef),
|
||||
},
|
||||
expectedPodsEvicted: 0,
|
||||
},
|
||||
{
|
||||
name: "without priorities",
|
||||
thresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 30,
|
||||
v1.ResourcePods: 30,
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(n1NodeName, 4000, 3000, 10, nil),
|
||||
test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
|
||||
test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable),
|
||||
},
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef),
|
||||
// These won't be evicted.
|
||||
test.BuildTestPod("p2", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
// A Critical Pod.
|
||||
pod.Namespace = "kube-system"
|
||||
priority := utils.SystemCriticalPriority
|
||||
pod.Spec.Priority = &priority
|
||||
}),
|
||||
// These won't be evicted.
|
||||
test.BuildTestPod("p3", 400, 0, n2NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p4", 400, 0, n2NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p5", 400, 0, n2NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p6", 400, 0, n2NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p7", 400, 0, n3NodeName, test.SetRSOwnerRef),
|
||||
},
|
||||
expectedPodsEvicted: 2,
|
||||
evictedPods: []string{"p1", "p7"},
|
||||
},
|
||||
{
|
||||
name: "without priorities stop when resource capacity is depleted",
|
||||
thresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 30,
|
||||
v1.ResourcePods: 30,
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(n1NodeName, 2000, 3000, 10, nil),
|
||||
test.BuildTestNode(n2NodeName, 2000, 3000, 10, nil),
|
||||
test.BuildTestNode(n3NodeName, 2000, 3000, 10, test.SetNodeUnschedulable),
|
||||
},
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef),
|
||||
// These won't be evicted.
|
||||
test.BuildTestPod("p2", 400, 0, n2NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p3", 400, 0, n2NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p4", 400, 0, n2NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p5", 400, 0, n2NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p6", 400, 0, n3NodeName, test.SetRSOwnerRef),
|
||||
},
|
||||
expectedPodsEvicted: 1,
|
||||
},
|
||||
{
|
||||
name: "with priorities",
|
||||
thresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 30,
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(n1NodeName, 4000, 3000, 10, nil),
|
||||
test.BuildTestNode(n2NodeName, 2000, 3000, 10, nil),
|
||||
test.BuildTestNode(n3NodeName, 2000, 3000, 10, test.SetNodeUnschedulable),
|
||||
},
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
test.SetPodPriority(pod, lowPriority)
|
||||
}),
|
||||
test.BuildTestPod("p2", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
test.SetPodPriority(pod, highPriority)
|
||||
}),
|
||||
// These won't be evicted.
|
||||
test.BuildTestPod("p5", 400, 0, n2NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p6", 400, 0, n2NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p7", 400, 0, n2NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p8", 400, 0, n2NodeName, test.SetRSOwnerRef),
|
||||
// These won't be evicted.
|
||||
test.BuildTestPod("p9", 400, 0, n3NodeName, test.SetDSOwnerRef),
|
||||
},
|
||||
expectedPodsEvicted: 1,
|
||||
evictedPods: []string{"p1"},
|
||||
},
|
||||
{
|
||||
name: "without priorities evicting best-effort pods only",
|
||||
thresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 30,
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(n1NodeName, 3000, 3000, 10, nil),
|
||||
test.BuildTestNode(n2NodeName, 3000, 3000, 5, nil),
|
||||
test.BuildTestNode(n3NodeName, 3000, 3000, 10, test.SetNodeUnschedulable),
|
||||
},
|
||||
// All pods are assumed to be burstable (test.BuildTestNode always sets both cpu/memory resource requests to some value)
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
test.MakeBestEffortPod(pod)
|
||||
}),
|
||||
test.BuildTestPod("p2", 400, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
}),
|
||||
// These won't be evicted.
|
||||
test.BuildTestPod("p3", 400, 0, n2NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p4", 400, 0, n2NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p5", 400, 0, n2NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p6", 400, 0, n2NodeName, test.SetRSOwnerRef),
|
||||
},
|
||||
expectedPodsEvicted: 1,
|
||||
evictedPods: []string{"p1"},
|
||||
},
|
||||
{
|
||||
name: "with extended resource",
|
||||
thresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 20,
|
||||
extendedResource: 40,
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(n1NodeName, 4000, 3000, 10, func(node *v1.Node) {
|
||||
test.SetNodeExtendedResource(node, extendedResource, 8)
|
||||
}),
|
||||
test.BuildTestNode(n2NodeName, 4000, 3000, 10, func(node *v1.Node) {
|
||||
test.SetNodeExtendedResource(node, extendedResource, 8)
|
||||
}),
|
||||
test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable),
|
||||
},
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 100, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
test.SetPodExtendedResourceRequest(pod, extendedResource, 1)
|
||||
}),
|
||||
test.BuildTestPod("p2", 100, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
test.SetPodExtendedResourceRequest(pod, extendedResource, 1)
|
||||
}),
|
||||
// These won't be evicted
|
||||
test.BuildTestPod("p3", 500, 0, n2NodeName, func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
test.SetPodExtendedResourceRequest(pod, extendedResource, 1)
|
||||
}),
|
||||
test.BuildTestPod("p4", 500, 0, n2NodeName, func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
test.SetPodExtendedResourceRequest(pod, extendedResource, 1)
|
||||
}),
|
||||
test.BuildTestPod("p5", 500, 0, n2NodeName, func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
test.SetPodExtendedResourceRequest(pod, extendedResource, 1)
|
||||
}),
|
||||
test.BuildTestPod("p6", 500, 0, n2NodeName, func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
test.SetPodExtendedResourceRequest(pod, extendedResource, 1)
|
||||
}),
|
||||
},
|
||||
expectedPodsEvicted: 2,
|
||||
evictedPods: []string{"p1", "p2"},
|
||||
},
|
||||
{
|
||||
name: "with extended resource in some of nodes",
|
||||
thresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 40,
|
||||
extendedResource: 40,
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(n1NodeName, 4000, 3000, 10, func(node *v1.Node) {
|
||||
test.SetNodeExtendedResource(node, extendedResource, 8)
|
||||
}),
|
||||
test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
|
||||
test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable),
|
||||
},
|
||||
pods: []*v1.Pod{
|
||||
//These won't be evicted
|
||||
test.BuildTestPod("p1", 100, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
test.SetPodExtendedResourceRequest(pod, extendedResource, 1)
|
||||
}),
|
||||
test.BuildTestPod("p2", 100, 0, n1NodeName, func(pod *v1.Pod) {
|
||||
test.SetRSOwnerRef(pod)
|
||||
test.SetPodExtendedResourceRequest(pod, extendedResource, 1)
|
||||
}),
|
||||
test.BuildTestPod("p3", 500, 0, n2NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p4", 500, 0, n2NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p5", 500, 0, n2NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p6", 500, 0, n2NodeName, test.SetRSOwnerRef),
|
||||
},
|
||||
expectedPodsEvicted: 0,
|
||||
},
|
||||
{
|
||||
name: "Other node match pod node selector",
|
||||
thresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 30,
|
||||
v1.ResourcePods: 30,
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(n1NodeName, 4000, 3000, 9, func(node *v1.Node) {
|
||||
node.ObjectMeta.Labels = map[string]string{
|
||||
nodeSelectorKey: nodeSelectorValue,
|
||||
}
|
||||
}),
|
||||
test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
|
||||
},
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p2", 400, 0, n1NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p3", 400, 0, n1NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p4", 400, 0, n1NodeName, test.SetDSOwnerRef),
|
||||
test.BuildTestPod("p5", 400, 0, n2NodeName, func(pod *v1.Pod) {
|
||||
// A pod selecting nodes in the "west" datacenter
|
||||
test.SetRSOwnerRef(pod)
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeSelectorKey: nodeSelectorValue,
|
||||
}
|
||||
}),
|
||||
},
|
||||
expectedPodsEvicted: 1,
|
||||
},
|
||||
{
|
||||
name: "Other node does not match pod node selector",
|
||||
thresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 30,
|
||||
v1.ResourcePods: 30,
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil),
|
||||
test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
|
||||
},
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p2", 400, 0, n1NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p3", 400, 0, n1NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p4", 400, 0, n1NodeName, test.SetDSOwnerRef),
|
||||
test.BuildTestPod("p5", 400, 0, n2NodeName, func(pod *v1.Pod) {
|
||||
// A pod selecting nodes in the "west" datacenter
|
||||
test.SetRSOwnerRef(pod)
|
||||
pod.Spec.NodeSelector = map[string]string{
|
||||
nodeSelectorKey: nodeSelectorValue,
|
||||
}
|
||||
}),
|
||||
},
|
||||
expectedPodsEvicted: 0,
|
||||
},
|
||||
{
|
||||
name: "Other node does not have enough Memory",
|
||||
thresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 30,
|
||||
v1.ResourcePods: 30,
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(n1NodeName, 4000, 200, 9, nil),
|
||||
test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
|
||||
},
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 400, 50, n1NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p2", 400, 50, n1NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p3", 400, 50, n1NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p4", 400, 50, n1NodeName, test.SetDSOwnerRef),
|
||||
test.BuildTestPod("p5", 400, 100, n2NodeName, func(pod *v1.Pod) {
|
||||
// A pod requesting more memory than is available on node1
|
||||
test.SetRSOwnerRef(pod)
|
||||
}),
|
||||
},
|
||||
expectedPodsEvicted: 0,
|
||||
},
|
||||
{
|
||||
name: "Other node does not have enough Memory",
|
||||
thresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 30,
|
||||
v1.ResourcePods: 30,
|
||||
},
|
||||
nodes: []*v1.Node{
|
||||
test.BuildTestNode(n1NodeName, 4000, 200, 9, nil),
|
||||
test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
|
||||
},
|
||||
pods: []*v1.Pod{
|
||||
test.BuildTestPod("p1", 400, 50, n1NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p2", 400, 50, n1NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p3", 400, 50, n1NodeName, test.SetRSOwnerRef),
|
||||
test.BuildTestPod("p4", 400, 50, n1NodeName, test.SetDSOwnerRef),
|
||||
test.BuildTestPod("p5", 400, 100, n2NodeName, func(pod *v1.Pod) {
|
||||
// A pod requesting more memory than is available on node1
|
||||
test.SetRSOwnerRef(pod)
|
||||
}),
|
||||
},
|
||||
expectedPodsEvicted: 0,
|
||||
},
|
||||
}
|
||||
|
||||
for _, testCase := range testCases {
|
||||
t.Run(testCase.name, func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
var objs []runtime.Object
|
||||
for _, node := range testCase.nodes {
|
||||
objs = append(objs, node)
|
||||
}
|
||||
for _, pod := range testCase.pods {
|
||||
objs = append(objs, pod)
|
||||
}
|
||||
fakeClient := fake.NewSimpleClientset(objs...)
|
||||
|
||||
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
|
||||
podInformer := sharedInformerFactory.Core().V1().Pods()
|
||||
|
||||
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
|
||||
if err != nil {
|
||||
t.Errorf("Build get pods assigned to node function error: %v", err)
|
||||
}
|
||||
|
||||
podsForEviction := make(map[string]struct{})
|
||||
for _, pod := range testCase.evictedPods {
|
||||
podsForEviction[pod] = struct{}{}
|
||||
}
|
||||
|
||||
evictionFailed := false
|
||||
if len(testCase.evictedPods) > 0 {
|
||||
fakeClient.Fake.AddReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) {
|
||||
getAction := action.(core.CreateAction)
|
||||
obj := getAction.GetObject()
|
||||
if eviction, ok := obj.(*v1beta1.Eviction); ok {
|
||||
if _, exists := podsForEviction[eviction.Name]; exists {
|
||||
return true, obj, nil
|
||||
}
|
||||
evictionFailed = true
|
||||
return true, nil, fmt.Errorf("pod %q was unexpectedly evicted", eviction.Name)
|
||||
}
|
||||
return true, obj, nil
|
||||
})
|
||||
}
|
||||
|
||||
sharedInformerFactory.Start(ctx.Done())
|
||||
sharedInformerFactory.WaitForCacheSync(ctx.Done())
|
||||
|
||||
//fakeClient := &fake.Clientset{}
|
||||
//fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
|
||||
// list := action.(core.ListAction)
|
||||
// fieldString := list.GetListRestrictions().Fields.String()
|
||||
// if strings.Contains(fieldString, n1NodeName) {
|
||||
// return true, test.pods[n1NodeName], nil
|
||||
// }
|
||||
// if strings.Contains(fieldString, n2NodeName) {
|
||||
// return true, test.pods[n2NodeName], nil
|
||||
// }
|
||||
// if strings.Contains(fieldString, n3NodeName) {
|
||||
// return true, test.pods[n3NodeName], nil
|
||||
// }
|
||||
// return true, nil, fmt.Errorf("Failed to list: %v", list)
|
||||
//})
|
||||
//fakeClient.Fake.AddReactor("get", "nodes", func(action core.Action) (bool, runtime.Object, error) {
|
||||
// getAction := action.(core.GetAction)
|
||||
// if node, exists := testCase.nodes[getAction.GetName()]; exists {
|
||||
// return true, node, nil
|
||||
// }
|
||||
// return true, nil, fmt.Errorf("Wrong node: %v", getAction.GetName())
|
||||
//})
|
||||
|
||||
podEvictor := evictions.NewPodEvictor(
|
||||
fakeClient,
|
||||
"v1",
|
||||
false,
|
||||
nil,
|
||||
nil,
|
||||
testCase.nodes,
|
||||
getPodsAssignedToNode,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
)
|
||||
|
||||
strategy := api.DeschedulerStrategy{
|
||||
Enabled: true,
|
||||
Params: &api.StrategyParameters{
|
||||
NodeResourceUtilizationThresholds: &api.NodeResourceUtilizationThresholds{
|
||||
Thresholds: testCase.thresholds,
|
||||
},
|
||||
NodeFit: true,
|
||||
},
|
||||
}
|
||||
HighNodeUtilization(ctx, fakeClient, strategy, testCase.nodes, podEvictor, getPodsAssignedToNode)
|
||||
|
||||
podsEvicted := podEvictor.TotalEvicted()
|
||||
if testCase.expectedPodsEvicted != podsEvicted {
|
||||
t.Errorf("Expected %v pods to be evicted but %v got evicted", testCase.expectedPodsEvicted, podsEvicted)
|
||||
}
|
||||
if evictionFailed {
|
||||
t.Errorf("Pod evictions failed unexpectedly")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateHighNodeUtilizationStrategyConfig(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
thresholds api.ResourceThresholds
|
||||
targetThresholds api.ResourceThresholds
|
||||
errInfo error
|
||||
}{
|
||||
{
|
||||
name: "passing target thresholds",
|
||||
thresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 20,
|
||||
v1.ResourceMemory: 20,
|
||||
},
|
||||
targetThresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 80,
|
||||
v1.ResourceMemory: 80,
|
||||
},
|
||||
errInfo: fmt.Errorf("targetThresholds is not applicable for HighNodeUtilization"),
|
||||
},
|
||||
{
|
||||
name: "passing empty thresholds",
|
||||
thresholds: api.ResourceThresholds{},
|
||||
errInfo: fmt.Errorf("thresholds config is not valid: no resource threshold is configured"),
|
||||
},
|
||||
{
|
||||
name: "passing invalid thresholds",
|
||||
thresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 80,
|
||||
v1.ResourceMemory: 120,
|
||||
},
|
||||
errInfo: fmt.Errorf("thresholds config is not valid: %v", fmt.Errorf(
|
||||
"%v threshold not in [%v, %v] range", v1.ResourceMemory, MinResourcePercentage, MaxResourcePercentage)),
|
||||
},
|
||||
{
|
||||
name: "passing valid strategy config",
|
||||
thresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 80,
|
||||
v1.ResourceMemory: 80,
|
||||
},
|
||||
errInfo: nil,
|
||||
},
|
||||
{
|
||||
name: "passing valid strategy config with extended resource",
|
||||
thresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 80,
|
||||
v1.ResourceMemory: 80,
|
||||
extendedResource: 80,
|
||||
},
|
||||
errInfo: nil,
|
||||
},
|
||||
}
|
||||
|
||||
for _, testCase := range tests {
|
||||
validateErr := validateHighUtilizationStrategyConfig(testCase.thresholds, testCase.targetThresholds)
|
||||
|
||||
if validateErr == nil || testCase.errInfo == nil {
|
||||
if validateErr != testCase.errInfo {
|
||||
t.Errorf("expected validity of strategy config: thresholds %#v targetThresholds %#v to be %v but got %v instead",
|
||||
testCase.thresholds, testCase.targetThresholds, testCase.errInfo, validateErr)
|
||||
}
|
||||
} else if validateErr.Error() != testCase.errInfo.Error() {
|
||||
t.Errorf("expected validity of strategy config: thresholds %#v targetThresholds %#v to be %v but got %v instead",
|
||||
testCase.thresholds, testCase.targetThresholds, testCase.errInfo, validateErr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestHighNodeUtilizationWithTaints(t *testing.T) {
|
||||
strategy := api.DeschedulerStrategy{
|
||||
Enabled: true,
|
||||
Params: &api.StrategyParameters{
|
||||
NodeResourceUtilizationThresholds: &api.NodeResourceUtilizationThresholds{
|
||||
Thresholds: api.ResourceThresholds{
|
||||
v1.ResourceCPU: 40,
|
||||
},
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
n1 := test.BuildTestNode("n1", 1000, 3000, 10, nil)
|
||||
n2 := test.BuildTestNode("n2", 1000, 3000, 10, nil)
|
||||
n3 := test.BuildTestNode("n3", 1000, 3000, 10, nil)
|
||||
n3withTaints := n3.DeepCopy()
|
||||
n3withTaints.Spec.Taints = []v1.Taint{
|
||||
{
|
||||
Key: "key",
|
||||
Value: "value",
|
||||
Effect: v1.TaintEffectNoSchedule,
|
||||
},
|
||||
}
|
||||
|
||||
podThatToleratesTaint := test.BuildTestPod("tolerate_pod", 200, 0, n1.Name, test.SetRSOwnerRef)
|
||||
podThatToleratesTaint.Spec.Tolerations = []v1.Toleration{
|
||||
{
|
||||
Key: "key",
|
||||
Value: "value",
|
||||
},
|
||||
}
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
nodes []*v1.Node
|
||||
pods []*v1.Pod
|
||||
evictionsExpected uint
|
||||
}{
|
||||
{
|
||||
name: "No taints",
|
||||
nodes: []*v1.Node{n1, n2, n3},
|
||||
pods: []*v1.Pod{
|
||||
//Node 1 pods
|
||||
test.BuildTestPod(fmt.Sprintf("pod_1_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_2_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
|
||||
test.BuildTestPod(fmt.Sprintf("pod_3_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
|
||||
// Node 2 pods
|
||||
test.BuildTestPod(fmt.Sprintf("pod_4_%s", n2.Name), 200, 0, n2.Name, test.SetRSOwnerRef),
|
||||
},
|
||||
evictionsExpected: 1,
|
||||
},
|
||||
{
|
||||
name: "No pod tolerates node taint",
|
||||
nodes: []*v1.Node{n1, n3withTaints},
|
||||
pods: []*v1.Pod{
|
||||
//Node 1 pods
|
||||
test.BuildTestPod(fmt.Sprintf("pod_1_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
|
||||
// Node 3 pods
|
||||
test.BuildTestPod(fmt.Sprintf("pod_2_%s", n3withTaints.Name), 200, 0, n3withTaints.Name, test.SetRSOwnerRef),
|
||||
},
|
||||
evictionsExpected: 0,
|
||||
},
|
||||
{
|
||||
name: "Pod which tolerates node taint",
|
||||
nodes: []*v1.Node{n1, n3withTaints},
|
||||
pods: []*v1.Pod{
|
||||
//Node 1 pods
|
||||
test.BuildTestPod(fmt.Sprintf("pod_1_%s", n1.Name), 100, 0, n1.Name, test.SetRSOwnerRef),
|
||||
podThatToleratesTaint,
|
||||
// Node 3 pods
|
||||
test.BuildTestPod(fmt.Sprintf("pod_9_%s", n3withTaints.Name), 500, 0, n3withTaints.Name, test.SetRSOwnerRef),
|
||||
},
|
||||
evictionsExpected: 1,
|
||||
},
|
||||
}
|
||||
|
||||
for _, item := range tests {
|
||||
t.Run(item.name, func(t *testing.T) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
defer cancel()
|
||||
|
||||
var objs []runtime.Object
|
||||
for _, node := range item.nodes {
|
||||
objs = append(objs, node)
|
||||
}
|
||||
|
||||
for _, pod := range item.pods {
|
||||
objs = append(objs, pod)
|
||||
}
|
||||
|
||||
fakeClient := fake.NewSimpleClientset(objs...)
|
||||
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
|
||||
podInformer := sharedInformerFactory.Core().V1().Pods()
|
||||
|
||||
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
|
||||
if err != nil {
|
||||
t.Errorf("Build get pods assigned to node function error: %v", err)
|
||||
}
|
||||
|
||||
sharedInformerFactory.Start(ctx.Done())
|
||||
sharedInformerFactory.WaitForCacheSync(ctx.Done())
|
||||
|
||||
podEvictor := evictions.NewPodEvictor(
|
||||
fakeClient,
|
||||
"policy/v1",
|
||||
false,
|
||||
&item.evictionsExpected,
|
||||
nil,
|
||||
item.nodes,
|
||||
getPodsAssignedToNode,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
false,
|
||||
)
|
||||
|
||||
HighNodeUtilization(ctx, fakeClient, strategy, item.nodes, podEvictor, getPodsAssignedToNode)
|
||||
|
||||
if item.evictionsExpected != podEvictor.TotalEvicted() {
|
||||
t.Errorf("Expected %v evictions, got %v", item.evictionsExpected, podEvictor.TotalEvicted())
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user