1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 05:14:13 +01:00

Compare commits

..

1 Commits

Author SHA1 Message Date
Mike Dame
012482b9b3 (TEST) release helm chart on master/release branches 2020-07-22 13:53:24 -04:00
4883 changed files with 214822 additions and 798658 deletions

6
.github/ci/ct.yaml vendored
View File

@@ -1,6 +0,0 @@
chart-dirs:
- charts
helm-extra-args: "--timeout=5m"
check-version-increment: false
helm-extra-set-args: "--set=kind=Deployment"
target-branch: master

View File

@@ -1,67 +0,0 @@
name: Helm
on:
push:
branches:
- master
- release-*
paths:
- 'charts/**'
- '.github/workflows/helm.yaml'
pull_request:
paths:
- 'charts/**'
- '.github/workflows/helm.yaml'
jobs:
lint-and-test:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Set up Helm
uses: azure/setup-helm@v2.1
with:
version: v3.9.2
- uses: actions/setup-python@v3.1.2
with:
python-version: 3.7
- uses: actions/setup-go@v3
with:
go-version: '1.19.0'
- name: Set up chart-testing
uses: helm/chart-testing-action@v2.2.1
with:
version: v3.7.0
- name: Run chart-testing (list-changed)
id: list-changed
run: |
changed=$(ct list-changed --config=.github/ci/ct.yaml)
if [[ -n "$changed" ]]; then
echo "::set-output name=changed::true"
fi
- name: Run chart-testing (lint)
run: ct lint --config=.github/ci/ct.yaml --validate-maintainers=false
# Need a multi node cluster so descheduler runs until evictions
- name: Create multi node Kind cluster
run: make kind-multi-node
# helm-extra-set-args only available after ct 3.6.0
- name: Run chart-testing (install)
run: ct install --config=.github/ci/ct.yaml
- name: E2E after chart install
env:
KUBERNETES_VERSION: "v1.25.0"
KIND_E2E: true
SKIP_INSTALL: true
run: make test-e2e

View File

@@ -3,6 +3,7 @@ name: Release Charts
on:
push:
branches:
- master
- release-*
jobs:
@@ -11,21 +12,20 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Configure Git
run: |
git config user.name "$GITHUB_ACTOR"
git config user.email "$GITHUB_ACTOR@users.noreply.github.com"
- name: Install Helm
uses: azure/setup-helm@v1
with:
version: v3.4.0
- name: Fetch history
run: git fetch --prune --unshallow
- name: Add dependency chart repos
run: |
helm repo add stable https://kubernetes-charts.storage.googleapis.com/
- name: Run chart-releaser
uses: helm/chart-releaser-action@v1.1.0
uses: helm/chart-releaser-action@v1.0.0-rc.2
env:
CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
CR_RELEASE_NAME_TEMPLATE: "descheduler-helm-chart-{{ .Version }}"

View File

@@ -1,47 +0,0 @@
name: "Security"
on:
push:
branches:
- main
- master
- release-*
schedule:
- cron: '30 1 * * 0'
jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
permissions:
actions: read
contents: read
security-events: write
strategy:
fail-fast: false
steps:
- name: Checkout
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Build image
run: |
IMAGE_REPO=${HELM_IMAGE_REPO:-descheduler}
IMAGE_TAG=${HELM_IMAGE_TAG:-security-test}
VERSION=security-test make image
- name: Run Trivy vulnerability scanner
uses: aquasecurity/trivy-action@master
with:
image-ref: 'descheduler:security-test'
format: 'sarif'
exit-code: '0'
severity: 'CRITICAL,HIGH'
output: 'trivy-results.sarif'
- name: Upload Trivy scan results to GitHub Security tab
uses: github/codeql-action/upload-sarif@v2
with:
sarif_file: 'trivy-results.sarif'
exit-code: '0'

3
.gitignore vendored
View File

@@ -3,6 +3,3 @@ _tmp/
vendordiff.patch
.idea/
*.code-workspace
.vscode/
kind
bin/

View File

@@ -11,19 +11,15 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
FROM golang:1.19.0
FROM golang:1.14.4
WORKDIR /go/src/sigs.k8s.io/descheduler
COPY . .
ARG ARCH
ARG VERSION
RUN VERSION=${VERSION} make build.$ARCH
RUN make
FROM scratch
MAINTAINER Kubernetes SIG Scheduling <kubernetes-sig-scheduling@googlegroups.com>
USER 1000
MAINTAINER Avesh Agarwal <avesh.ncsu@gmail.com>
COPY --from=0 /go/src/sigs.k8s.io/descheduler/_output/bin/descheduler /bin/descheduler

View File

@@ -13,9 +13,7 @@
# limitations under the License.
FROM scratch
MAINTAINER Kubernetes SIG Scheduling <kubernetes-sig-scheduling@googlegroups.com>
USER 1000
MAINTAINER Avesh Agarwal <avesh.ncsu@gmail.com>
COPY _output/bin/descheduler /bin/descheduler

View File

@@ -14,20 +14,16 @@
.PHONY: test
export CONTAINER_ENGINE ?= docker
# VERSION is based on a date stamp plus the last commit
VERSION?=v$(shell date +%Y%m%d)-$(shell git describe --tags)
BRANCH?=$(shell git branch --show-current)
SHA1?=$(shell git rev-parse HEAD)
# VERSION is currently based on the last commit
VERSION?=$(shell git describe --tags)
COMMIT=$(shell git rev-parse HEAD)
BUILD=$(shell date +%FT%T%z)
LDFLAG_LOCATION=sigs.k8s.io/descheduler/pkg/version
ARCHS = amd64 arm arm64
LDFLAG_LOCATION=sigs.k8s.io/descheduler/cmd/descheduler/app
LDFLAGS=-ldflags "-X ${LDFLAG_LOCATION}.version=${VERSION} -X ${LDFLAG_LOCATION}.buildDate=${BUILD} -X ${LDFLAG_LOCATION}.gitbranch=${BRANCH} -X ${LDFLAG_LOCATION}.gitsha1=${SHA1}"
LDFLAGS=-ldflags "-X ${LDFLAG_LOCATION}.version=${VERSION} -X ${LDFLAG_LOCATION}.buildDate=${BUILD} -X ${LDFLAG_LOCATION}.gitCommit=${COMMIT}"
GOLANGCI_VERSION := v1.49.0
HAS_GOLANGCI := $(shell ls _output/bin/golangci-lint 2> /dev/null)
GOLANGCI_VERSION := v1.15.0
HAS_GOLANGCI := $(shell ls _output/bin/golangci-lint)
# REGISTRY is the container registry to push
# into. The default is to push to the staging
@@ -45,65 +41,30 @@ IMAGE_GCLOUD:=$(REGISTRY)/descheduler:$(VERSION)
# In the future binaries can be uploaded to
# GCS bucket gs://k8s-staging-descheduler.
HAS_HELM := $(shell which helm 2> /dev/null)
HAS_HELM := $(shell which helm)
all: build
build:
CGO_ENABLED=0 go build ${LDFLAGS} -o _output/bin/descheduler sigs.k8s.io/descheduler/cmd/descheduler
build.amd64:
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build ${LDFLAGS} -o _output/bin/descheduler sigs.k8s.io/descheduler/cmd/descheduler
build.arm:
CGO_ENABLED=0 GOOS=linux GOARCH=arm GOARM=7 go build ${LDFLAGS} -o _output/bin/descheduler sigs.k8s.io/descheduler/cmd/descheduler
build.arm64:
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build ${LDFLAGS} -o _output/bin/descheduler sigs.k8s.io/descheduler/cmd/descheduler
dev-image: build
$(CONTAINER_ENGINE) build -f Dockerfile.dev -t $(IMAGE) .
docker build -f Dockerfile.dev -t $(IMAGE) .
image:
$(CONTAINER_ENGINE) build --build-arg VERSION="$(VERSION)" --build-arg ARCH="amd64" -t $(IMAGE) .
docker build -t $(IMAGE) .
image.amd64:
$(CONTAINER_ENGINE) build --build-arg VERSION="$(VERSION)" --build-arg ARCH="amd64" -t $(IMAGE)-amd64 .
image.arm:
$(CONTAINER_ENGINE) build --build-arg VERSION="$(VERSION)" --build-arg ARCH="arm" -t $(IMAGE)-arm .
image.arm64:
$(CONTAINER_ENGINE) build --build-arg VERSION="$(VERSION)" --build-arg ARCH="arm64" -t $(IMAGE)-arm64 .
push: image
push-container-to-gcloud: image
gcloud auth configure-docker
$(CONTAINER_ENGINE) tag $(IMAGE) $(IMAGE_GCLOUD)
$(CONTAINER_ENGINE) push $(IMAGE_GCLOUD)
docker tag $(IMAGE) $(IMAGE_GCLOUD)
docker push $(IMAGE_GCLOUD)
push-all: image.amd64 image.arm image.arm64
gcloud auth configure-docker
for arch in $(ARCHS); do \
$(CONTAINER_ENGINE) tag $(IMAGE)-$${arch} $(IMAGE_GCLOUD)-$${arch} ;\
$(CONTAINER_ENGINE) push $(IMAGE_GCLOUD)-$${arch} ;\
done
DOCKER_CLI_EXPERIMENTAL=enabled $(CONTAINER_ENGINE) manifest create $(IMAGE_GCLOUD) $(addprefix --amend $(IMAGE_GCLOUD)-, $(ARCHS))
for arch in $(ARCHS); do \
DOCKER_CLI_EXPERIMENTAL=enabled $(CONTAINER_ENGINE) manifest annotate --arch $${arch} $(IMAGE_GCLOUD) $(IMAGE_GCLOUD)-$${arch} ;\
done
DOCKER_CLI_EXPERIMENTAL=enabled $(CONTAINER_ENGINE) manifest push $(IMAGE_GCLOUD) ;\
push: push-container-to-gcloud
clean:
rm -rf _output
rm -rf _tmp
verify: verify-govet verify-spelling verify-gofmt verify-vendor lint lint-chart verify-toc verify-gen
verify-govet:
./hack/verify-govet.sh
verify-spelling:
./hack/verify-spelling.sh
verify: verify-gofmt verify-vendor lint lint-chart
verify-gofmt:
./hack/verify-gofmt.sh
@@ -111,9 +72,6 @@ verify-gofmt:
verify-vendor:
./hack/verify-vendor.sh
verify-toc:
./hack/verify-toc.sh
test-unit:
./test/run-unit-tests.sh
@@ -124,37 +82,15 @@ gen:
./hack/update-generated-conversions.sh
./hack/update-generated-deep-copies.sh
./hack/update-generated-defaulters.sh
./hack/update-toc.sh
verify-gen:
./hack/verify-conversions.sh
./hack/verify-deep-copies.sh
./hack/verify-defaulters.sh
lint:
ifndef HAS_GOLANGCI
curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b ./_output/bin ${GOLANGCI_VERSION}
curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | sh -s -- -b ./_output/bin ${GOLANGCI_VERSION}
endif
./_output/bin/golangci-lint run
# helm
ensure-helm-install:
lint-chart:
ifndef HAS_HELM
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 && chmod 700 ./get_helm.sh && ./get_helm.sh
endif
lint-chart: ensure-helm-install
helm lint ./charts/descheduler
build-helm:
helm package ./charts/descheduler --dependency-update --destination ./bin/chart
test-helm: ensure-helm-install
./test/run-helm-tests.sh
kind-multi-node:
kind create cluster --name kind --config ./hack/kind_config.yaml --wait 2m
ct-helm:
./hack/verify-chart.sh

20
OWNERS
View File

@@ -1,16 +1,12 @@
approvers:
- damemi
- ingvagabund
- seanmalloy
- a7i
reviewers:
- damemi
- seanmalloy
- ingvagabund
- lixiang233
- a7i
- janeliul
emeritus_approvers:
- aveshagarwal
- k82cn
- ravisantoshgudimetla
- damemi
reviewers:
- aveshagarwal
- k82cn
- ravisantoshgudimetla
- damemi
- seanmalloy
- ingvagabund

695
README.md
View File

@@ -1,8 +1,9 @@
[![Go Report Card](https://goreportcard.com/badge/sigs.k8s.io/descheduler)](https://goreportcard.com/report/sigs.k8s.io/descheduler)
![Release Charts](https://github.com/kubernetes-sigs/descheduler/workflows/Release%20Charts/badge.svg)
[![Go Report Card](https://goreportcard.com/badge/kubernetes-sigs/descheduler)](https://goreportcard.com/report/sigs.k8s.io/descheduler)
# Descheduler for Kubernetes
## Introduction
Scheduling in Kubernetes is the process of binding pending pods to nodes, and is performed by
a component of Kubernetes called kube-scheduler. The scheduler's decisions, whether or where a
pod can or can not be scheduled, are guided by its configurable policy which comprises of set of
@@ -22,47 +23,9 @@ Descheduler, based on its policy, finds pods that can be moved and evicts them.
note, in current implementation, descheduler does not schedule replacement of evicted pods
but relies on the default scheduler for that.
Table of Contents
=================
<!-- toc -->
- [Quick Start](#quick-start)
- [Run As A Job](#run-as-a-job)
- [Run As A CronJob](#run-as-a-cronjob)
- [Run As A Deployment](#run-as-a-deployment)
- [Install Using Helm](#install-using-helm)
- [Install Using Kustomize](#install-using-kustomize)
- [User Guide](#user-guide)
- [Policy and Strategies](#policy-and-strategies)
- [RemoveDuplicates](#removeduplicates)
- [LowNodeUtilization](#lownodeutilization)
- [HighNodeUtilization](#highnodeutilization)
- [RemovePodsViolatingInterPodAntiAffinity](#removepodsviolatinginterpodantiaffinity)
- [RemovePodsViolatingNodeAffinity](#removepodsviolatingnodeaffinity)
- [RemovePodsViolatingNodeTaints](#removepodsviolatingnodetaints)
- [RemovePodsViolatingTopologySpreadConstraint](#removepodsviolatingtopologyspreadconstraint)
- [RemovePodsHavingTooManyRestarts](#removepodshavingtoomanyrestarts)
- [PodLifeTime](#podlifetime)
- [RemoveFailedPods](#removefailedpods)
- [Filter Pods](#filter-pods)
- [Namespace filtering](#namespace-filtering)
- [Priority filtering](#priority-filtering)
- [Label filtering](#label-filtering)
- [Node Fit filtering](#node-fit-filtering)
- [Pod Evictions](#pod-evictions)
- [Pod Disruption Budget (PDB)](#pod-disruption-budget-pdb)
- [High Availability](#high-availability)
- [Configure HA Mode](#configure-ha-mode)
- [Metrics](#metrics)
- [Compatibility Matrix](#compatibility-matrix)
- [Getting Involved and Contributing](#getting-involved-and-contributing)
- [Communicating With Contributors](#communicating-with-contributors)
- [Roadmap](#roadmap)
- [Code of conduct](#code-of-conduct)
<!-- /toc -->
## Quick Start
The descheduler can be run as a `Job`, `CronJob`, or `Deployment` inside of a k8s cluster. It has the
The descheduler can be run as a Job or CronJob inside of a k8s cluster. It has the
advantage of being able to be run multiple times without needing user intervention.
The descheduler pod is run as a critical pod in the `kube-system` namespace to avoid
being evicted by itself or by the kubelet.
@@ -70,52 +33,17 @@ being evicted by itself or by the kubelet.
### Run As A Job
```
kubectl create -f kubernetes/base/rbac.yaml
kubectl create -f kubernetes/base/configmap.yaml
kubectl create -f kubernetes/job/job.yaml
kubectl create -f kubernetes/rbac.yaml
kubectl create -f kubernetes/configmap.yaml
kubectl create -f kubernetes/job.yaml
```
### Run As A CronJob
```
kubectl create -f kubernetes/base/rbac.yaml
kubectl create -f kubernetes/base/configmap.yaml
kubectl create -f kubernetes/cronjob/cronjob.yaml
```
### Run As A Deployment
```
kubectl create -f kubernetes/base/rbac.yaml
kubectl create -f kubernetes/base/configmap.yaml
kubectl create -f kubernetes/deployment/deployment.yaml
```
### Install Using Helm
Starting with release v0.18.0 there is an official helm chart that can be used to install the
descheduler. See the [helm chart README](https://github.com/kubernetes-sigs/descheduler/blob/master/charts/descheduler/README.md) for detailed instructions.
The descheduler helm chart is also listed on the [artifact hub](https://artifacthub.io/packages/helm/descheduler/descheduler).
### Install Using Kustomize
You can use kustomize to install descheduler.
See the [resources | Kustomize](https://kubectl.docs.kubernetes.io/references/kustomize/resource/) for detailed instructions.
Run As A Job
```
kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/job?ref=v0.25.0' | kubectl apply -f -
```
Run As A CronJob
```
kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/cronjob?ref=v0.25.0' | kubectl apply -f -
```
Run As A Deployment
```
kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/deployment?ref=v0.25.0' | kubectl apply -f -
kubectl create -f kubernetes/rbac.yaml
kubectl create -f kubernetes/configmap.yaml
kubectl create -f kubernetes/cronjob.yaml
```
## User Guide
@@ -124,68 +52,25 @@ See the [user guide](docs/user-guide.md) in the `/docs` directory.
## Policy and Strategies
Descheduler's policy is configurable and includes strategies that can be enabled or disabled. By default, all strategies are enabled.
The policy includes a common configuration that applies to all the strategies:
| Name | Default Value | Description |
|------|---------------|-------------|
| `nodeSelector` | `nil` | limiting the nodes which are processed |
| `evictLocalStoragePods` | `false` | allows eviction of pods with local storage |
| `evictSystemCriticalPods` | `false` | [Warning: Will evict Kubernetes system pods] allows eviction of pods with any priority, including system pods like kube-dns |
| `ignorePvcPods` | `false` | set whether PVC pods should be evicted or ignored |
| `maxNoOfPodsToEvictPerNode` | `nil` | maximum number of pods evicted from each node (summed through all strategies) |
| `maxNoOfPodsToEvictPerNamespace` | `nil` | maximum number of pods evicted from each namespace (summed through all strategies) |
| `evictFailedBarePods` | `false` | allow eviction of pods without owner references and in failed phase |
As part of the policy, the parameters associated with each strategy can be configured.
See each strategy for details on available parameters.
**Policy:**
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
nodeSelector: prod=dev
evictFailedBarePods: false
evictLocalStoragePods: true
evictSystemCriticalPods: true
maxNoOfPodsToEvictPerNode: 40
ignorePvcPods: false
strategies:
...
```
The following diagram provides a visualization of most of the strategies to help
categorize how strategies fit together.
![Strategies diagram](strategies_diagram.png)
Descheduler's policy is configurable and includes strategies that can be enabled or disabled.
Seven strategies `RemoveDuplicates`, `LowNodeUtilization`, `RemovePodsViolatingInterPodAntiAffinity`,
`RemovePodsViolatingNodeAffinity`, `RemovePodsViolatingNodeTaints`, `RemovePodsHavingTooManyRestarts`, and `PodLifeTime`
are currently implemented. As part of the policy, the parameters associated with the strategies can be configured too.
By default, all strategies are enabled.
### RemoveDuplicates
This strategy makes sure that there is only one pod associated with a ReplicaSet (RS),
ReplicationController (RC), StatefulSet, or Job running on the same node. If there are more,
This strategy makes sure that there is only one pod associated with a Replica Set (RS),
Replication Controller (RC), Deployment, or Job running on the same node. If there are more,
those duplicate pods are evicted for better spreading of pods in a cluster. This issue could happen
if some nodes went down due to whatever reasons, and pods on them were moved to other nodes leading to
more than one pod associated with a RS or RC, for example, running on the same node. Once the failed nodes
are ready again, this strategy could be enabled to evict those duplicate pods.
It provides one optional parameter, `excludeOwnerKinds`, which is a list of OwnerRef `Kind`s. If a pod
has any of these `Kind`s listed as an `OwnerRef`, that pod will not be considered for eviction. Note that
pods created by Deployments are considered for eviction by this strategy. The `excludeOwnerKinds` parameter
should include `ReplicaSet` to have pods created by Deployments excluded.
It provides one optional parameter, `ExcludeOwnerKinds`, which is a list of OwnerRef `Kind`s. If a pod
has any of these `Kind`s listed as an `OwnerRef`, that pod will not be considered for eviction.
**Parameters:**
|Name|Type|
|---|---|
|`excludeOwnerKinds`|list(string)|
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
**Example:**
```yaml
```
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
@@ -204,49 +89,20 @@ in the hope that recreation of evicted pods will be scheduled on these underutil
parameters of this strategy are configured under `nodeResourceUtilizationThresholds`.
The under utilization of nodes is determined by a configurable threshold `thresholds`. The threshold
`thresholds` can be configured for cpu, memory, number of pods, and extended resources in terms of percentage (the percentage is
calculated as the current resources requested on the node vs [total allocatable](https://kubernetes.io/docs/concepts/architecture/nodes/#capacity).
For pods, this means the number of pods on the node as a fraction of the pod capacity set for that node).
If a node's usage is below threshold for all (cpu, memory, number of pods and extended resources), the node is considered underutilized.
`thresholds` can be configured for cpu, memory, and number of pods in terms of percentage. If a node's
usage is below threshold for all (cpu, memory, and number of pods), the node is considered underutilized.
Currently, pods request resource requirements are considered for computing node resource utilization.
There is another configurable threshold, `targetThresholds`, that is used to compute those potential nodes
from where pods could be evicted. If a node's usage is above targetThreshold for any (cpu, memory, number of pods, or extended resources),
from where pods could be evicted. If a node's usage is above targetThreshold for any (cpu, memory, or number of pods),
the node is considered over utilized. Any node between the thresholds, `thresholds` and `targetThresholds` is
considered appropriately utilized and is not considered for eviction. The threshold, `targetThresholds`,
can be configured for cpu, memory, and number of pods too in terms of percentage.
These thresholds, `thresholds` and `targetThresholds`, could be tuned as per your cluster requirements. Note that this
strategy evicts pods from `overutilized nodes` (those with usage above `targetThresholds`) to `underutilized nodes`
(those with usage below `thresholds`), it will abort if any number of `underutilized nodes` or `overutilized nodes` is zero.
These thresholds, `thresholds` and `targetThresholds`, could be tuned as per your cluster requirements.
Here is an example of a policy for this strategy:
Additionally, the strategy accepts a `useDeviationThresholds` parameter.
If that parameter is set to `true`, the thresholds are considered as percentage deviations from mean resource usage.
`thresholds` will be deducted from the mean among all nodes and `targetThresholds` will be added to the mean.
A resource consumption above (resp. below) this window is considered as overutilization (resp. underutilization).
**NOTE:** Node resource consumption is determined by the requests and limits of pods, not actual usage.
This approach is chosen in order to maintain consistency with the kube-scheduler, which follows the same
design for scheduling pods onto nodes. This means that resource usage as reported by Kubelet (or commands
like `kubectl top`) may differ from the calculated consumption, due to these components reporting
actual usage metrics. Implementing metrics-based descheduling is currently TODO for the project.
**Parameters:**
|Name|Type|
|---|---|
|`thresholds`|map(string:int)|
|`targetThresholds`|map(string:int)|
|`numberOfNodes`|int|
|`useDeviationThresholds`|bool|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
**Example:**
```yaml
```
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
@@ -265,110 +121,34 @@ strategies:
```
Policy should pass the following validation checks:
* Three basic native types of resources are supported: `cpu`, `memory` and `pods`.
If any of these resource types is not specified, all its thresholds default to 100% to avoid nodes going from underutilized to overutilized.
* Extended resources are supported. For example, resource type `nvidia.com/gpu` is specified for GPU node utilization. Extended resources are optional,
and will not be used to compute node's usage if it's not specified in `thresholds` and `targetThresholds` explicitly.
* Only three types of resources are supported: `cpu`, `memory` and `pods`.
* `thresholds` or `targetThresholds` can not be nil and they must configure exactly the same types of resources.
* The valid range of the resource's percentage value is \[0, 100\]
* Percentage value of `thresholds` can not be greater than `targetThresholds` for the same resource.
If any of the resource types is not specified, all its thresholds default to 100% to avoid nodes going
from underutilized to overutilized.
There is another parameter associated with the `LowNodeUtilization` strategy, called `numberOfNodes`.
This parameter can be configured to activate the strategy only when the number of under utilized nodes
are above the configured value. This could be helpful in large clusters where a few nodes could go
under utilized frequently or for a short period of time. By default, `numberOfNodes` is set to zero.
### HighNodeUtilization
This strategy finds nodes that are under utilized and evicts pods from the nodes in the hope that these pods will be
scheduled compactly into fewer nodes. Used in conjunction with node auto-scaling, this strategy is intended to help
trigger down scaling of under utilized nodes.
This strategy **must** be used with the scheduler scoring strategy `MostAllocated`. The parameters of this strategy are
configured under `nodeResourceUtilizationThresholds`.
The under utilization of nodes is determined by a configurable threshold `thresholds`. The threshold
`thresholds` can be configured for cpu, memory, number of pods, and extended resources in terms of percentage. The percentage is
calculated as the current resources requested on the node vs [total allocatable](https://kubernetes.io/docs/concepts/architecture/nodes/#capacity).
For pods, this means the number of pods on the node as a fraction of the pod capacity set for that node.
If a node's usage is below threshold for all (cpu, memory, number of pods and extended resources), the node is considered underutilized.
Currently, pods request resource requirements are considered for computing node resource utilization.
Any node above `thresholds` is considered appropriately utilized and is not considered for eviction.
The `thresholds` param could be tuned as per your cluster requirements. Note that this
strategy evicts pods from `underutilized nodes` (those with usage below `thresholds`)
so that they can be recreated in appropriately utilized nodes.
The strategy will abort if any number of `underutilized nodes` or `appropriately utilized nodes` is zero.
**NOTE:** Node resource consumption is determined by the requests and limits of pods, not actual usage.
This approach is chosen in order to maintain consistency with the kube-scheduler, which follows the same
design for scheduling pods onto nodes. This means that resource usage as reported by Kubelet (or commands
like `kubectl top`) may differ from the calculated consumption, due to these components reporting
actual usage metrics. Implementing metrics-based descheduling is currently TODO for the project.
**Parameters:**
|Name|Type|
|---|---|
|`thresholds`|map(string:int)|
|`numberOfNodes`|int|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
**Example:**
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"HighNodeUtilization":
enabled: true
params:
nodeResourceUtilizationThresholds:
thresholds:
"cpu" : 20
"memory": 20
"pods": 20
```
Policy should pass the following validation checks:
* Three basic native types of resources are supported: `cpu`, `memory` and `pods`. If any of these resource types is not specified, all its thresholds default to 100%.
* Extended resources are supported. For example, resource type `nvidia.com/gpu` is specified for GPU node utilization. Extended resources are optional, and will not be used to compute node's usage if it's not specified in `thresholds` explicitly.
* `thresholds` can not be nil.
* The valid range of the resource's percentage value is \[0, 100\]
There is another parameter associated with the `HighNodeUtilization` strategy, called `numberOfNodes`.
This parameter can be configured to activate the strategy only when the number of under utilized nodes
is above the configured value. This could be helpful in large clusters where a few nodes could go
under utilized frequently or for a short period of time. By default, `numberOfNodes` is set to zero.
### RemovePodsViolatingInterPodAntiAffinity
This strategy makes sure that pods violating interpod anti-affinity are removed from nodes. For example,
if there is podA on a node and podB and podC (running on the same node) have anti-affinity rules which prohibit
them to run on the same node, then podA will be evicted from the node so that podB and podC could run. This
issue could happen, when the anti-affinity rules for podB and podC are created when they are already running on
node.
node. Currently, there are no parameters associated with this strategy. To disable this strategy, the
policy should look like:
**Parameters:**
|Name|Type|
|---|---|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|`labelSelector`|(see [label filtering](#label-filtering))|
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
**Example:**
```yaml
```
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"RemovePodsViolatingInterPodAntiAffinity":
enabled: true
enabled: false
```
### RemovePodsViolatingNodeAffinity
@@ -389,20 +169,9 @@ of scheduling. Over time nodeA stops to satisfy the rule. When the strategy gets
executed and there is another node available that satisfies the node affinity rule,
podA gets evicted from nodeA.
**Parameters:**
The policy file should look like:
|Name|Type|
|---|---|
|`nodeAffinityType`|list(string)|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|`labelSelector`|(see [label filtering](#label-filtering))|
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
**Example:**
```yaml
```
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
@@ -418,97 +187,21 @@ strategies:
This strategy makes sure that pods violating NoSchedule taints on nodes are removed. For example there is a
pod "podA" with a toleration to tolerate a taint ``key=value:NoSchedule`` scheduled and running on the tainted
node. If the node's taint is subsequently updated/removed, taint is no longer satisfied by its pods' tolerations
and will be evicted.
and will be evicted. The policy file should look like:
Node taints can be excluded from consideration by specifying a list of excludedTaints. If a node taint key **or**
key=value matches an excludedTaints entry, the taint will be ignored.
For example, excludedTaints entry "dedicated" would match all taints with key "dedicated", regardless of value.
excludedTaints entry "dedicated=special-user" would match taints with key "dedicated" and value "special-user".
**Parameters:**
|Name|Type|
|---|---|
|`excludedTaints`|list(string)|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|`labelSelector`|(see [label filtering](#label-filtering))|
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
**Example:**
````yaml
````
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"RemovePodsViolatingNodeTaints":
enabled: true
params:
excludedTaints:
- dedicated=special-user # exclude taints with key "dedicated" and value "special-user"
- reserved # exclude all taints with key "reserved"
````
### RemovePodsViolatingTopologySpreadConstraint
This strategy makes sure that pods violating [topology spread constraints](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/)
are evicted from nodes. Specifically, it tries to evict the minimum number of pods required to balance topology domains to within each constraint's `maxSkew`.
This strategy requires k8s version 1.18 at a minimum.
By default, this strategy only deals with hard constraints, setting parameter `includeSoftConstraints` to `true` will
include soft constraints.
Strategy parameter `labelSelector` is not utilized when balancing topology domains and is only applied during eviction to determine if the pod can be evicted.
**Parameters:**
|Name|Type|
|---|---|
|`includeSoftConstraints`|bool|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|`labelSelector`|(see [label filtering](#label-filtering))|
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
**Example:**
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"RemovePodsViolatingTopologySpreadConstraint":
enabled: true
params:
includeSoftConstraints: false
```
### RemovePodsHavingTooManyRestarts
This strategy makes sure that pods having too many restarts are removed from nodes. For example a pod with EBS/PD that
can't get the volume/disk attached to the instance, then the pod should be re-scheduled to other nodes. Its parameters
include `podRestartThreshold`, which is the number of restarts (summed over all eligible containers) at which a pod
should be evicted, and `includingInitContainers`, which determines whether init container restarts should be factored
into that calculation.
This strategy makes sure that pods having too many restarts are removed from nodes. For example a pod with EBS/PD that can't get the volume/disk attached to the instance, then the pod should be re-scheduled to other nodes.
**Parameters:**
|Name|Type|
|---|---|
|`podRestartThreshold`|int|
|`includingInitContainers`|bool|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|`labelSelector`|(see [label filtering](#label-filtering))|
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
**Example:**
```yaml
```
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
@@ -522,276 +215,33 @@ strategies:
### PodLifeTime
This strategy evicts pods that are older than `maxPodLifeTimeSeconds`.
This strategy evicts pods that are older than `.strategies.PodLifeTime.params.maxPodLifeTimeSeconds` The policy
file should look like:
You can also specify `states` parameter to **only** evict pods matching the following conditions:
- [Pod Phase](https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase) status of: `Running`, `Pending`
- [Container State Waiting](https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-state-waiting) condition of: `PodInitializing`, `ContainerCreating`
If a value for `states` or `podStatusPhases` is not specified,
Pods in any state (even `Running`) are considered for eviction.
**Parameters:**
|Name|Type|Notes|
|---|---|---|
|`maxPodLifeTimeSeconds`|int||
|`podStatusPhases`|list(string)|Deprecated in v0.25+ Use `states` instead|
|`states`|list(string)|Only supported in v0.25+|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))||
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))||
|`namespaces`|(see [namespace filtering](#namespace-filtering))||
|`labelSelector`|(see [label filtering](#label-filtering))||
**Example:**
```yaml
````
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"PodLifeTime":
enabled: true
params:
podLifeTime:
maxPodLifeTimeSeconds: 86400
states:
- "Pending"
- "PodInitializing"
```
### RemoveFailedPods
This strategy evicts pods that are in failed status phase.
You can provide an optional parameter to filter by failed `reasons`.
`reasons` can be expanded to include reasons of InitContainers as well by setting the optional parameter `includingInitContainers` to `true`.
You can specify an optional parameter `minPodLifetimeSeconds` to evict pods that are older than specified seconds.
Lastly, you can specify the optional parameter `excludeOwnerKinds` and if a pod
has any of these `Kind`s listed as an `OwnerRef`, that pod will not be considered for eviction.
**Parameters:**
|Name|Type|
|---|---|
|`minPodLifetimeSeconds`|uint|
|`excludeOwnerKinds`|list(string)|
|`reasons`|list(string)|
|`includingInitContainers`|bool|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|`labelSelector`|(see [label filtering](#label-filtering))|
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
**Example:**
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"RemoveFailedPods":
enabled: true
params:
failedPods:
reasons:
- "NodeAffinity"
includingInitContainers: true
excludeOwnerKinds:
- "Job"
minPodLifetimeSeconds: 3600
```
## Filter Pods
### Namespace filtering
The following strategies accept a `namespaces` parameter which allows to specify a list of including, resp. excluding namespaces:
* `PodLifeTime`
* `RemovePodsHavingTooManyRestarts`
* `RemovePodsViolatingNodeTaints`
* `RemovePodsViolatingNodeAffinity`
* `RemovePodsViolatingInterPodAntiAffinity`
* `RemoveDuplicates`
* `RemovePodsViolatingTopologySpreadConstraint`
* `RemoveFailedPods`
For example:
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"PodLifeTime":
enabled: true
params:
podLifeTime:
maxPodLifeTimeSeconds: 86400
namespaces:
include:
- "namespace1"
- "namespace2"
```
In the examples `PodLifeTime` gets executed only over `namespace1` and `namespace2`.
The similar holds for `exclude` field:
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"PodLifeTime":
enabled: true
params:
podLifeTime:
maxPodLifeTimeSeconds: 86400
namespaces:
exclude:
- "namespace1"
- "namespace2"
```
The strategy gets executed over all namespaces but `namespace1` and `namespace2`.
It's not allowed to compute `include` with `exclude` field.
### Priority filtering
All strategies are able to configure a priority threshold, only pods under the threshold can be evicted. You can
specify this threshold by setting `thresholdPriorityClassName`(setting the threshold to the value of the given
priority class) or `thresholdPriority`(directly setting the threshold) parameters. By default, this threshold
is set to the value of `system-cluster-critical` priority class.
Note: Setting `evictSystemCriticalPods` to true disables priority filtering entirely.
E.g.
Setting `thresholdPriority`
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"PodLifeTime":
enabled: true
params:
podLifeTime:
maxPodLifeTimeSeconds: 86400
thresholdPriority: 10000
```
Setting `thresholdPriorityClassName`
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"PodLifeTime":
enabled: true
params:
podLifeTime:
maxPodLifeTimeSeconds: 86400
thresholdPriorityClassName: "priorityclass1"
```
Note that you can't configure both `thresholdPriority` and `thresholdPriorityClassName`, if the given priority class
does not exist, descheduler won't create it and will throw an error.
### Label filtering
The following strategies can configure a [standard kubernetes labelSelector](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.25/#labelselector-v1-meta)
to filter pods by their labels:
* `PodLifeTime`
* `RemovePodsHavingTooManyRestarts`
* `RemovePodsViolatingNodeTaints`
* `RemovePodsViolatingNodeAffinity`
* `RemovePodsViolatingInterPodAntiAffinity`
* `RemovePodsViolatingTopologySpreadConstraint`
* `RemoveFailedPods`
This allows running strategies among pods the descheduler is interested in.
For example:
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"PodLifeTime":
enabled: true
params:
podLifeTime:
maxPodLifeTimeSeconds: 86400
labelSelector:
matchLabels:
component: redis
matchExpressions:
- {key: tier, operator: In, values: [cache]}
- {key: environment, operator: NotIn, values: [dev]}
```
### Node Fit filtering
The following strategies accept a `nodeFit` boolean parameter which can optimize descheduling:
* `RemoveDuplicates`
* `LowNodeUtilization`
* `HighNodeUtilization`
* `RemovePodsViolatingInterPodAntiAffinity`
* `RemovePodsViolatingNodeAffinity`
* `RemovePodsViolatingNodeTaints`
* `RemovePodsViolatingTopologySpreadConstraint`
* `RemovePodsHavingTooManyRestarts`
* `RemoveFailedPods`
If set to `true` the descheduler will consider whether or not the pods that meet eviction criteria will fit on other nodes before evicting them. If a pod cannot be rescheduled to another node, it will not be evicted. Currently the following criteria are considered when setting `nodeFit` to `true`:
- A `nodeSelector` on the pod
- Any `tolerations` on the pod and any `taints` on the other nodes
- `nodeAffinity` on the pod
- Resource `requests` made by the pod and the resources available on other nodes
- Whether any of the other nodes are marked as `unschedulable`
E.g.
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"LowNodeUtilization":
enabled: true
params:
nodeFit: true
nodeResourceUtilizationThresholds:
thresholds:
"cpu": 20
"memory": 20
"pods": 20
targetThresholds:
"cpu": 50
"memory": 50
"pods": 50
```
Note that node fit filtering references the current pod spec, and not that of it's owner.
Thus, if the pod is owned by a ReplicationController (and that ReplicationController was modified recently),
the pod may be running with an outdated spec, which the descheduler will reference when determining node fit.
This is expected behavior as the descheduler is a "best-effort" mechanism.
Using Deployments instead of ReplicationControllers provides an automated rollout of pod spec changes, therefore ensuring that the descheduler has an up-to-date view of the cluster state.
````
## Pod Evictions
When the descheduler decides to evict pods from a node, it employs the following general mechanism:
* [Critical pods](https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/) (with priorityClassName set to system-cluster-critical or system-node-critical) are never evicted (unless `evictSystemCriticalPods: true` is set).
* Pods (static or mirrored pods or standalone pods) not part of an ReplicationController, ReplicaSet(Deployment), StatefulSet, or Job are
never evicted because these pods won't be recreated. (Standalone pods in failed status phase can be evicted by setting `evictFailedBarePods: true`)
* [Critical pods](https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/) (with priorityClassName set to system-cluster-critical or system-node-critical) are never evicted.
* Pods (static or mirrored pods or stand alone pods) not part of an RC, RS, Deployment or Job are
never evicted because these pods won't be recreated.
* Pods associated with DaemonSets are never evicted.
* Pods with local storage are never evicted (unless `evictLocalStoragePods: true` is set).
* Pods with PVCs are evicted (unless `ignorePvcPods: true` is set).
* Pods with local storage are never evicted.
* In `LowNodeUtilization` and `RemovePodsViolatingInterPodAntiAffinity`, pods are evicted by their priority from low to high, and if they have same priority,
best effort pods are evicted before burstable and guaranteed pods.
* All types of pods with the annotation `descheduler.alpha.kubernetes.io/evict` are eligible for eviction. This
* All types of pods with the annotation descheduler.alpha.kubernetes.io/evict are evicted. This
annotation is used to override checks which prevent eviction and users can select which pod is evicted.
Users should know how and if the pod will be recreated.
* Pods with a non-nil DeletionTimestamp are not evicted by default.
Setting `--v=4` or greater on the Descheduler will log all reasons why any pod is not evictable.
@@ -800,33 +250,6 @@ Setting `--v=4` or greater on the Descheduler will log all reasons why any pod i
Pods subject to a Pod Disruption Budget(PDB) are not evicted if descheduling violates its PDB. The pods
are evicted by using the eviction subresource to handle PDB.
## High Availability
In High Availability mode, Descheduler starts [leader election](https://github.com/kubernetes/client-go/tree/master/tools/leaderelection) process in Kubernetes. You can activate HA mode
if you choose to deploy your application as Deployment.
Deployment starts with 1 replica by default. If you want to use more than 1 replica, you must consider
enable High Availability mode since we don't want to run descheduler pods simultaneously.
### Configure HA Mode
The leader election process can be enabled by setting `--leader-elect` in the CLI. You can also set
`--set=leaderElection.enabled=true` flag if you are using Helm.
To get best results from HA mode some additional configurations might require:
* Configure a [podAntiAffinity](https://kubernetes.io/docs/concepts/scheduling-eviction/assign-pod-node) rule if you want to schedule onto a node only if that node is in the same zone as at least one already-running descheduler
* Set the replica count greater than 1
## Metrics
| name | type | description |
|-------|-------|----------------|
| build_info | gauge | constant 1 |
| pods_evicted | CounterVec | total number of pods evicted |
The metrics are served through https://localhost:10258/metrics by default.
The address and port can be changed by setting `--binding-address` and `--secure-port` flags.
## Compatibility Matrix
The below compatibility matrix shows the k8s client package(client-go, apimachinery, etc) versions that descheduler
is compiled with. At this time descheduler does not have a hard dependency to a specific k8s release. However a
@@ -836,19 +259,13 @@ v0.18 should work with k8s v1.18, v1.17, and v1.16.
Starting with descheduler release v0.18 the minor version of descheduler matches the minor version of the k8s client
packages that it is compiled with.
| Descheduler | Supported Kubernetes Version |
|-------------|------------------------------|
| v0.25 | v1.25 |
| v0.24 | v1.24 |
| v0.23 | v1.23 |
| v0.22 | v1.22 |
| v0.21 | v1.21 |
| v0.20 | v1.20 |
| v0.19 | v1.19 |
| v0.18 | v1.18 |
| v0.10 | v1.17 |
| v0.4-v0.9 | v1.9+ |
| v0.1-v0.3 | v1.7-v1.8 |
Descheduler | Supported Kubernetes Version
-------------|-----------------------------
v0.18 | v1.18
v0.10 | v1.17
v0.4-v0.9 | v1.9+
v0.1-v0.3 | v1.7-v1.8
## Getting Involved and Contributing

View File

@@ -1,7 +1,7 @@
apiVersion: v1
name: descheduler
version: 0.24.1
appVersion: 0.24.1
version: 0.18.0
appVersion: 0.18.0
description: Descheduler for Kubernetes is used to rebalance clusters by evicting pods that can potentially be scheduled on better nodes. In the current implementation, descheduler does not schedule replacement of evicted pods but relies on the default scheduler for that.
keywords:
- kubernetes
@@ -12,5 +12,5 @@ icon: https://kubernetes.io/images/favicon.png
sources:
- https://github.com/kubernetes-sigs/descheduler
maintainers:
- name: Kubernetes SIG Scheduling
email: kubernetes-sig-scheduling@googlegroups.com
- name: stevehipwell
email: steve.hipwell@github.com

View File

@@ -6,12 +6,12 @@
```shell
helm repo add descheduler https://kubernetes-sigs.github.io/descheduler/
helm install my-release --namespace kube-system descheduler/descheduler
$ helm install descheduler/descheduler --name my-release
```
## Introduction
This chart bootstraps a [descheduler](https://github.com/kubernetes-sigs/descheduler/) cron job on a [Kubernetes](http://kubernetes.io) cluster using the [Helm](https://helm.sh) package manager.
This chart bootstraps a [desheduler](https://github.com/kubernetes-sigs/descheduler/) cron job on a [Kubernetes](http://kubernetes.io) cluster using the [Helm](https://helm.sh) package manager.
## Prerequisites
@@ -22,7 +22,7 @@ This chart bootstraps a [descheduler](https://github.com/kubernetes-sigs/desched
To install the chart with the release name `my-release`:
```shell
helm install --namespace kube-system my-release descheduler/descheduler
helm install --name my-release descheduler/descheduler
```
The command deploys _descheduler_ on the Kubernetes cluster in the default configuration. The [configuration](#configuration) section lists the parameters that can be configured during installation.
@@ -43,45 +43,17 @@ The command removes all the Kubernetes components associated with the chart and
The following table lists the configurable parameters of the _descheduler_ chart and their default values.
| Parameter | Description | Default |
|-------------------------------------|-----------------------------------------------------------------------------------------------------------------------|--------------------------------------|
| `kind` | Use as CronJob or Deployment | `CronJob` |
| `image.repository` | Docker repository to use | `k8s.gcr.io/descheduler/descheduler` |
| `image.tag` | Docker tag to use | `v[chart appVersion]` |
| `image.pullPolicy` | Docker image pull policy | `IfNotPresent` |
| `imagePullSecrets` | Docker repository secrets | `[]` |
| `nameOverride` | String to partially override `descheduler.fullname` template (will prepend the release name) | `""` |
| `fullnameOverride` | String to fully override `descheduler.fullname` template | `""` |
| `cronJobApiVersion` | CronJob API Group Version | `"batch/v1"` |
| `schedule` | The cron schedule to run the _descheduler_ job on | `"*/2 * * * *"` |
| `startingDeadlineSeconds` | If set, configure `startingDeadlineSeconds` for the _descheduler_ job | `nil` |
| `successfulJobsHistoryLimit` | If set, configure `successfulJobsHistoryLimit` for the _descheduler_ job | `nil` |
| `failedJobsHistoryLimit` | If set, configure `failedJobsHistoryLimit` for the _descheduler_ job | `nil` |
| `deschedulingInterval` | If using kind:Deployment, sets time between consecutive descheduler executions. | `5m` |
| `replicas` | The replica count for Deployment | `1` |
| `leaderElection` | The options for high availability when running replicated components | _see values.yaml_ |
| `cmdOptions` | The options to pass to the _descheduler_ command | _see values.yaml_ |
| `deschedulerPolicy.strategies` | The _descheduler_ strategies to apply | _see values.yaml_ |
| `priorityClassName` | The name of the priority class to add to pods | `system-cluster-critical` |
| `rbac.create` | If `true`, create & use RBAC resources | `true` |
| `resources` | Descheduler container CPU and memory requests/limits | _see values.yaml_ |
| `serviceAccount.create` | If `true`, create a service account for the cron job | `true` |
| `serviceAccount.name` | The name of the service account to use, if not set and create is true a name is generated using the fullname template | `nil` |
| `serviceAccount.annotations` | Specifies custom annotations for the serviceAccount | `{}` |
| `podAnnotations` | Annotations to add to the descheduler Pods | `{}` |
| `podLabels` | Labels to add to the descheduler Pods | `{}` |
| `nodeSelector` | Node selectors to run the descheduler cronjob/deployment on specific nodes | `nil` |
| `service.enabled` | If `true`, create a service for deployment | `false` |
| `serviceMonitor.enabled` | If `true`, create a ServiceMonitor for deployment | `false` |
| `serviceMonitor.namespace` | The namespace where Prometheus expects to find service monitors | `nil` |
| `serviceMonitor.interval` | The scrape interval. If not set, the Prometheus default scrape interval is used | `nil` |
| `serviceMonitor.honorLabels` | Keeps the scraped data's labels when labels are on collisions with target labels. | `true` |
| `serviceMonitor.insecureSkipVerify` | Skip TLS certificate validation when scraping | `true` |
| `serviceMonitor.serverName` | Name of the server to use when validating TLS certificate | `nil` |
| `serviceMonitor.metricRelabelings` | MetricRelabelConfigs to apply to samples after scraping, but before ingestion | `[]` |
| `serviceMonitor.relabelings` | RelabelConfigs to apply to samples before scraping | `[]` |
| `affinity` | Node affinity to run the descheduler cronjob/deployment on specific nodes | `nil` |
| `tolerations` | tolerations to run the descheduler cronjob/deployment on specific nodes | `nil` |
| `suspend` | Set spec.suspend in descheduler cronjob | `false` |
| `commonLabels` | Labels to apply to all resources | `{}` |
| `livenessProbe` | Liveness probe configuration for the descheduler container | _see values.yaml_ |
| Parameter | Description | Default |
| ------------------------------ | --------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------ |
| `image.repository` | Docker repository to use | `us.gcr.io/k8s-artifacts-prod/descheduler/descheduler` |
| `image.tag` | Docker tag to use | `v[chart appVersion]` |
| `image.pullPolicy` | Docker image pull policy | `IfNotPresent` |
| `nameOverride` | String to partially override `descheduler.fullname` template (will prepend the release name) | `""` |
| `fullnameOverride` | String to fully override `descheduler.fullname` template | `""` |
| `schedule` | The cron schedule to run the _descheduler_ job on | `"*/2 * * * *"` |
| `cmdOptions` | The options to pass to the _descheduler_ command | _see values.yaml_ |
| `deschedulerPolicy.strategies` | The _descheduler_ strategies to apply | _see values.yaml_ |
| `priorityClassName` | The name of the priority class to add to pods | `system-cluster-critical` |
| `rbac.create` | If `true`, create & use RBAC resources | `true` |
| `serviceAccount.create` | If `true`, create a service account for the cron job | `true` |
| `serviceAccount.name` | The name of the service account to use, if not set and create is true a name is generated using the fullname template | `nil` |

View File

@@ -1,7 +1 @@
Descheduler installed as a {{ .Values.kind }}.
{{- if eq .Values.kind "Deployment" }}
{{- if eq .Values.replicas 1.0}}
WARNING: You set replica count as 1 and workload kind as Deployment however leaderElection is not enabled. Consider enabling Leader Election for HA mode.
{{- end}}
{{- end}}
Descheduler installed as a cron job.

View File

@@ -42,17 +42,6 @@ app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- if .Values.commonLabels}}
{{ toYaml .Values.commonLabels }}
{{- end }}
{{- end -}}
{{/*
Selector labels
*/}}
{{- define "descheduler.selectorLabels" -}}
app.kubernetes.io/name: {{ include "descheduler.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end -}}
{{/*
@@ -65,30 +54,3 @@ Create the name of the service account to use
{{ default "default" .Values.serviceAccount.name }}
{{- end -}}
{{- end -}}
{{/*
Leader Election
*/}}
{{- define "descheduler.leaderElection"}}
{{- if .Values.leaderElection -}}
- --leader-elect={{ .Values.leaderElection.enabled }}
{{- if .Values.leaderElection.leaseDuration }}
- --leader-elect-lease-duration={{ .Values.leaderElection.leaseDuration }}
{{- end }}
{{- if .Values.leaderElection.renewDeadline }}
- --leader-elect-renew-deadline={{ .Values.leaderElection.renewDeadline }}
{{- end }}
{{- if .Values.leaderElection.retryPeriod }}
- --leader-elect-retry-period={{ .Values.leaderElection.retryPeriod }}
{{- end }}
{{- if .Values.leaderElection.resourceLock }}
- --leader-elect-resource-lock={{ .Values.leaderElection.resourceLock }}
{{- end }}
{{- if .Values.leaderElection.resourceName }}
- --leader-elect-resource-name={{ .Values.leaderElection.resourceName }}
{{- end }}
{{- if .Values.leaderElection.resourceNamescape }}
- --leader-elect-resource-namespace={{ .Values.leaderElection.resourceNamescape }}
{{- end -}}
{{- end }}
{{- end }}

View File

@@ -12,25 +12,10 @@ rules:
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "watch", "list"]
- apiGroups: [""]
resources: ["namespaces"]
verbs: ["get", "watch", "list"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "watch", "list", "delete"]
- apiGroups: [""]
resources: ["pods/eviction"]
verbs: ["create"]
- apiGroups: ["scheduling.k8s.io"]
resources: ["priorityclasses"]
verbs: ["get", "watch", "list"]
{{- if .Values.leaderElection.enabled }}
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["create", "update"]
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
resourceNames: ["{{ .Values.leaderElection.resourceName | default "descheduler" }}"]
verbs: ["get", "patch", "delete"]
{{- end }}
{{- end -}}

View File

@@ -2,7 +2,6 @@ apiVersion: v1
kind: ConfigMap
metadata:
name: {{ template "descheduler.fullname" . }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "descheduler.labels" . | nindent 4 }}
data:

View File

@@ -1,26 +1,12 @@
{{- if eq .Values.kind "CronJob" }}
apiVersion: {{ .Values.cronJobApiVersion | default "batch/v1" }}
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: {{ template "descheduler.fullname" . }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "descheduler.labels" . | nindent 4 }}
spec:
schedule: {{ .Values.schedule | quote }}
{{- if .Values.suspend }}
suspend: {{ .Values.suspend }}
{{- end }}
concurrencyPolicy: "Forbid"
{{- if .Values.startingDeadlineSeconds }}
startingDeadlineSeconds: {{ .Values.startingDeadlineSeconds }}
{{- end }}
{{- if .Values.successfulJobsHistoryLimit }}
successfulJobsHistoryLimit: {{ .Values.successfulJobsHistoryLimit }}
{{- end }}
{{- if .Values.failedJobsHistoryLimit }}
failedJobsHistoryLimit: {{ .Values.failedJobsHistoryLimit }}
{{- end }}
jobTemplate:
spec:
template:
@@ -32,32 +18,17 @@ spec:
{{- .Values.podAnnotations | toYaml | nindent 12 }}
{{- end }}
labels:
{{- include "descheduler.selectorLabels" . | nindent 12 }}
app.kubernetes.io/name: {{ include "descheduler.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- if .Values.podLabels }}
{{- .Values.podLabels | toYaml | nindent 12 }}
{{- end }}
spec:
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- if .Values.priorityClassName }}
priorityClassName: {{ .Values.priorityClassName }}
{{- end }}
serviceAccountName: {{ template "descheduler.serviceAccountName" . }}
restartPolicy: "Never"
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 10 }}
{{- end }}
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default (printf "v%s" .Chart.AppVersion) }}"
@@ -73,18 +44,6 @@ spec:
- {{ $value | quote }}
{{- end }}
{{- end }}
livenessProbe:
{{- toYaml .Values.livenessProbe | nindent 16 }}
resources:
{{- toYaml .Values.resources | nindent 16 }}
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
privileged: false
readOnlyRootFilesystem: true
runAsNonRoot: true
volumeMounts:
- mountPath: /policy-dir
name: policy-volume
@@ -92,4 +51,3 @@ spec:
- name: policy-volume
configMap:
name: {{ template "descheduler.fullname" . }}
{{- end }}

View File

@@ -1,94 +0,0 @@
{{- if eq .Values.kind "Deployment" }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ template "descheduler.fullname" . }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "descheduler.labels" . | nindent 4 }}
spec:
{{- if gt .Values.replicas 1.0}}
{{- if not .Values.leaderElection.enabled }}
{{- fail "You must set leaderElection to use more than 1 replica"}}
{{- end}}
replicas: {{ required "leaderElection required for running more than one replica" .Values.replicas }}
{{- else }}
replicas: 1
{{- end }}
selector:
matchLabels:
{{- include "descheduler.selectorLabels" . | nindent 6 }}
template:
metadata:
labels:
{{- include "descheduler.selectorLabels" . | nindent 8 }}
{{- if .Values.podLabels }}
{{- .Values.podLabels | toYaml | nindent 8 }}
{{- end }}
annotations:
checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
{{- if .Values.podAnnotations }}
{{- .Values.podAnnotations | toYaml | nindent 8 }}
{{- end }}
spec:
{{- if .Values.priorityClassName }}
priorityClassName: {{ .Values.priorityClassName }}
{{- end }}
serviceAccountName: {{ template "descheduler.serviceAccountName" . }}
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 10 }}
{{- end }}
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default (printf "v%s" .Chart.AppVersion) }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
command:
- "/bin/descheduler"
args:
- "--policy-config-file"
- "/policy-dir/policy.yaml"
- "--descheduling-interval"
- {{ required "deschedulingInterval required for running as Deployment" .Values.deschedulingInterval }}
{{- range $key, $value := .Values.cmdOptions }}
- {{ printf "--%s" $key | quote }}
{{- if $value }}
- {{ $value | quote }}
{{- end }}
{{- end }}
{{- include "descheduler.leaderElection" . | nindent 12 }}
ports:
- containerPort: 10258
protocol: TCP
livenessProbe:
{{- toYaml .Values.livenessProbe | nindent 12 }}
resources:
{{- toYaml .Values.resources | nindent 12 }}
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
privileged: false
readOnlyRootFilesystem: true
runAsNonRoot: true
volumeMounts:
- mountPath: /policy-dir
name: policy-volume
volumes:
- name: policy-volume
configMap:
name: {{ template "descheduler.fullname" . }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}

View File

@@ -1,21 +0,0 @@
{{- if eq .Values.kind "Deployment" }}
{{- if eq .Values.service.enabled true }}
apiVersion: v1
kind: Service
metadata:
labels:
{{- include "descheduler.labels" . | nindent 4 }}
name: {{ template "descheduler.fullname" . }}
namespace: {{ .Release.Namespace }}
spec:
clusterIP: None
ports:
- name: http-metrics
port: 10258
protocol: TCP
targetPort: 10258
selector:
{{- include "descheduler.selectorLabels" . | nindent 4 }}
type: ClusterIP
{{- end }}
{{- end }}

View File

@@ -3,10 +3,6 @@ apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ template "descheduler.serviceAccountName" . }}
namespace: {{ .Release.Namespace }}
labels:
{{- include "descheduler.labels" . | nindent 4 }}
{{- if .Values.serviceAccount.annotations }}
annotations: {{ toYaml .Values.serviceAccount.annotations | nindent 4 }}
{{- end }}
{{- end -}}

View File

@@ -1,41 +0,0 @@
{{- if eq .Values.kind "Deployment" }}
{{- if eq .Values.serviceMonitor.enabled true }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
name: {{ template "descheduler.fullname" . }}-servicemonitor
namespace: {{ .Values.serviceMonitor.namespace | default .Release.Namespace }}
labels:
{{- include "descheduler.labels" . | nindent 4 }}
spec:
jobLabel: jobLabel
namespaceSelector:
matchNames:
- {{ .Release.Namespace }}
selector:
matchLabels:
{{- include "descheduler.selectorLabels" . | nindent 6 }}
endpoints:
- honorLabels: {{ .Values.serviceMonitor.honorLabels | default true }}
port: http-metrics
{{- if .Values.serviceMonitor.interval }}
interval: {{ .Values.serviceMonitor.interval }}
{{- end }}
scheme: https
tlsConfig:
{{- if eq .Values.serviceMonitor.insecureSkipVerify true }}
insecureSkipVerify: true
{{- end }}
{{- if .Values.serviceMonitor.serverName }}
serverName: {{ .Values.serviceMonitor.serverName }}
{{- end}}
{{- if .Values.serviceMonitor.metricRelabelings }}
metricRelabelings:
{{ tpl (toYaml .Values.serviceMonitor.metricRelabelings | indent 4) . }}
{{- end }}
{{- if .Values.serviceMonitor.relabelings }}
relabelings:
{{ tpl (toYaml .Values.serviceMonitor.relabelings | indent 4) . }}
{{- end }}
{{- end }}
{{- end }}

View File

@@ -2,90 +2,36 @@
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
# CronJob or Deployment
kind: CronJob
image:
repository: k8s.gcr.io/descheduler/descheduler
repository: us.gcr.io/k8s-artifacts-prod/descheduler/descheduler
# Overrides the image tag whose default is the chart version
tag: ""
pullPolicy: IfNotPresent
imagePullSecrets:
# - name: container-registry-secret
resources:
requests:
cpu: 500m
memory: 256Mi
# limits:
# cpu: 100m
# memory: 128Mi
nameOverride: ""
fullnameOverride: ""
# labels that'll be applied to all resources
commonLabels: {}
cronJobApiVersion: "batch/v1"
schedule: "*/2 * * * *"
suspend: false
# startingDeadlineSeconds: 200
# successfulJobsHistoryLimit: 1
# failedJobsHistoryLimit: 1
# Required when running as a Deployment
deschedulingInterval: 5m
# Specifies the replica count for Deployment
# Set leaderElection if you want to use more than 1 replica
# Set affinity.podAntiAffinity rule if you want to schedule onto a node
# only if that node is in the same zone as at least one already-running descheduler
replicas: 1
# Specifies whether Leader Election resources should be created
# Required when running as a Deployment
leaderElection: {}
# enabled: true
# leaseDuration: 15s
# renewDeadline: 10s
# retryPeriod: 2s
# resourceLock: "leases"
# resourceName: "descheduler"
# resourceNamescape: "kube-system"
cmdOptions:
v: 3
# evict-local-storage-pods:
# max-pods-to-evict-per-node: 10
# node-selector: "key1=value1,key2=value2"
deschedulerPolicy:
# nodeSelector: "key1=value1,key2=value2"
# maxNoOfPodsToEvictPerNode: 10
# maxNoOfPodsToEvictPerNamespace: 10
# ignorePvcPods: true
# evictLocalStoragePods: true
strategies:
RemoveDuplicates:
enabled: true
RemovePodsHavingTooManyRestarts:
enabled: true
params:
podsHavingTooManyRestarts:
podRestartThreshold: 100
includingInitContainers: true
RemovePodsViolatingNodeTaints:
enabled: true
RemovePodsViolatingNodeAffinity:
enabled: true
params:
nodeAffinityType:
- requiredDuringSchedulingIgnoredDuringExecution
nodeAffinityType:
- requiredDuringSchedulingIgnoredDuringExecution
RemovePodsViolatingInterPodAntiAffinity:
enabled: true
RemovePodsViolatingTopologySpreadConstraint:
enabled: true
params:
includeSoftConstraints: false
LowNodeUtilization:
enabled: true
params:
@@ -101,34 +47,6 @@ deschedulerPolicy:
priorityClassName: system-cluster-critical
nodeSelector: {}
# foo: bar
affinity: {}
# nodeAffinity:
# requiredDuringSchedulingIgnoredDuringExecution:
# nodeSelectorTerms:
# - matchExpressions:
# - key: kubernetes.io/e2e-az-name
# operator: In
# values:
# - e2e-az1
# - e2e-az2
# podAntiAffinity:
# requiredDuringSchedulingIgnoredDuringExecution:
# - labelSelector:
# matchExpressions:
# - key: app.kubernetes.io/name
# operator: In
# values:
# - descheduler
# topologyKey: "kubernetes.io/hostname"
tolerations: []
# - key: 'management'
# operator: 'Equal'
# value: 'tool'
# effect: 'NoSchedule'
rbac:
# Specifies whether RBAC resources should be created
create: true
@@ -139,41 +57,3 @@ serviceAccount:
# The name of the ServiceAccount to use.
# If not set and create is true, a name is generated using the fullname template
name:
# Specifies custom annotations for the serviceAccount
annotations: {}
podAnnotations: {}
podLabels: {}
livenessProbe:
failureThreshold: 3
httpGet:
path: /healthz
port: 10258
scheme: HTTPS
initialDelaySeconds: 3
periodSeconds: 10
service:
enabled: false
serviceMonitor:
enabled: false
# The namespace where Prometheus expects to find service monitors.
# namespace: ""
interval: ""
# honorLabels: true
insecureSkipVerify: true
serverName: null
metricRelabelings: []
# - action: keep
# regex: 'descheduler_(build_info|pods_evicted)'
# sourceLabels: [__name__]
relabelings: []
# - sourceLabels: [__meta_kubernetes_pod_node_name]
# separator: ;
# regex: ^(.*)$
# targetLabel: nodename
# replacement: $1
# action: replace

View File

@@ -1,20 +1,20 @@
# See https://cloud.google.com/cloud-build/docs/build-config
# this must be specified in seconds. If omitted, defaults to 600s (10 mins)
timeout: 1500s
timeout: 1200s
# this prevents errors if you don't use both _GIT_TAG and _PULL_BASE_REF,
# or any new substitutions added in the future.
options:
substitution_option: ALLOW_LOOSE
steps:
- name: 'gcr.io/k8s-staging-test-infra/gcb-docker-gcloud:v20211118-2f2d816b90'
- name: 'gcr.io/k8s-testimages/gcb-docker-gcloud:v20190906-745fed4'
entrypoint: make
env:
- DOCKER_CLI_EXPERIMENTAL=enabled
- VERSION=$_GIT_TAG
- BASE_REF=$_PULL_BASE_REF
args:
- push-all
- push
substitutions:
# _GIT_TAG will be filled with a git-based tag for the image, of the form vYYYYMMDD-hash, and
# can be used as a substitution

View File

@@ -18,79 +18,44 @@ limitations under the License.
package options
import (
"time"
"github.com/spf13/pflag"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
apiserveroptions "k8s.io/apiserver/pkg/server/options"
clientset "k8s.io/client-go/kubernetes"
componentbaseconfig "k8s.io/component-base/config"
componentbaseoptions "k8s.io/component-base/config/options"
// install the componentconfig api so we get its defaulting and conversion functions
"sigs.k8s.io/descheduler/pkg/apis/componentconfig"
"sigs.k8s.io/descheduler/pkg/apis/componentconfig/v1alpha1"
deschedulerscheme "sigs.k8s.io/descheduler/pkg/descheduler/scheme"
)
const (
DefaultDeschedulerPort = 10258
"github.com/spf13/pflag"
)
// DeschedulerServer configuration
type DeschedulerServer struct {
componentconfig.DeschedulerConfiguration
Client clientset.Interface
EventClient clientset.Interface
SecureServing *apiserveroptions.SecureServingOptionsWithLoopback
DisableMetrics bool
Client clientset.Interface
}
// NewDeschedulerServer creates a new DeschedulerServer with default parameters
func NewDeschedulerServer() (*DeschedulerServer, error) {
cfg, err := newDefaultComponentConfig()
if err != nil {
return nil, err
}
secureServing := apiserveroptions.NewSecureServingOptions().WithLoopback()
secureServing.BindPort = DefaultDeschedulerPort
return &DeschedulerServer{
DeschedulerConfiguration: *cfg,
SecureServing: secureServing,
}, nil
}
func newDefaultComponentConfig() (*componentconfig.DeschedulerConfiguration, error) {
versionedCfg := v1alpha1.DeschedulerConfiguration{
LeaderElection: componentbaseconfig.LeaderElectionConfiguration{
LeaderElect: false,
LeaseDuration: metav1.Duration{Duration: 137 * time.Second},
RenewDeadline: metav1.Duration{Duration: 107 * time.Second},
RetryPeriod: metav1.Duration{Duration: 26 * time.Second},
ResourceLock: "leases",
ResourceName: "descheduler",
ResourceNamespace: "kube-system",
},
}
deschedulerscheme.Scheme.Default(&versionedCfg)
func NewDeschedulerServer() *DeschedulerServer {
versioned := v1alpha1.DeschedulerConfiguration{}
deschedulerscheme.Scheme.Default(&versioned)
cfg := componentconfig.DeschedulerConfiguration{}
if err := deschedulerscheme.Scheme.Convert(&versionedCfg, &cfg, nil); err != nil {
return nil, err
deschedulerscheme.Scheme.Convert(versioned, &cfg, nil)
s := DeschedulerServer{
DeschedulerConfiguration: cfg,
}
return &cfg, nil
return &s
}
// AddFlags adds flags for a specific SchedulerServer to the specified FlagSet
func (rs *DeschedulerServer) AddFlags(fs *pflag.FlagSet) {
fs.StringVar(&rs.Logging.Format, "logging-format", "text", `Sets the log format. Permitted formats: "text", "json". Non-default formats don't honor these flags: --add-dir-header, --alsologtostderr, --log-backtrace-at, --log_dir, --log_file, --log_file_max_size, --logtostderr, --skip-headers, --skip-log-headers, --stderrthreshold, --log-flush-frequency.\nNon-default choices are currently alpha and subject to change without warning.`)
fs.DurationVar(&rs.DeschedulingInterval, "descheduling-interval", rs.DeschedulingInterval, "Time interval between two consecutive descheduler executions. Setting this value instructs the descheduler to run in a continuous loop at the interval specified.")
fs.StringVar(&rs.KubeconfigFile, "kubeconfig", rs.KubeconfigFile, "File with kube configuration.")
fs.StringVar(&rs.PolicyConfigFile, "policy-config-file", rs.PolicyConfigFile, "File with descheduler policy configuration.")
fs.BoolVar(&rs.DryRun, "dry-run", rs.DryRun, "execute descheduler in dry run mode.")
fs.BoolVar(&rs.DisableMetrics, "disable-metrics", rs.DisableMetrics, "Disables metrics. The metrics are by default served through https://localhost:10258/metrics. Secure address, resp. port can be changed through --bind-address, resp. --secure-port flags.")
componentbaseoptions.BindLeaderElectionFlags(&rs.LeaderElection, fs)
rs.SecureServing.AddFlags(fs)
// node-selector query causes descheduler to run only on nodes that matches the node labels in the query
fs.StringVar(&rs.NodeSelector, "node-selector", rs.NodeSelector, "Selector (label query) to filter on, supports '=', '==', and '!='.(e.g. -l key1=value1,key2=value2)")
// max-no-pods-to-evict limits the maximum number of pods to be evicted per node by descheduler.
fs.IntVar(&rs.MaxNoOfPodsToEvictPerNode, "max-pods-to-evict-per-node", rs.MaxNoOfPodsToEvictPerNode, "Limits the maximum number of pods to be evicted per node by descheduler")
// evict-local-storage-pods allows eviction of pods that are using local storage. This is false by default.
fs.BoolVar(&rs.EvictLocalStoragePods, "evict-local-storage-pods", rs.EvictLocalStoragePods, "Enables evicting pods using local storage by descheduler")
}

View File

@@ -18,92 +18,44 @@ limitations under the License.
package app
import (
"context"
"flag"
"io"
"os/signal"
"syscall"
"k8s.io/apiserver/pkg/server/healthz"
"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
"sigs.k8s.io/descheduler/pkg/descheduler"
"github.com/spf13/cobra"
apiserver "k8s.io/apiserver/pkg/server"
"k8s.io/apiserver/pkg/server/mux"
restclient "k8s.io/client-go/rest"
registry "k8s.io/component-base/logs/api/v1"
_ "k8s.io/component-base/logs/json/register"
"k8s.io/component-base/metrics/legacyregistry"
aflag "k8s.io/component-base/cli/flag"
"k8s.io/component-base/logs"
"k8s.io/klog/v2"
)
// NewDeschedulerCommand creates a *cobra.Command object with default parameters
func NewDeschedulerCommand(out io.Writer) *cobra.Command {
s, err := options.NewDeschedulerServer()
if err != nil {
klog.ErrorS(err, "unable to initialize server")
}
s := options.NewDeschedulerServer()
cmd := &cobra.Command{
Use: "descheduler",
Short: "descheduler",
Long: `The descheduler evicts pods which may be bound to less desired nodes`,
Run: func(cmd *cobra.Command, args []string) {
// s.Logs.Config.Format = s.Logging.Format
// LoopbackClientConfig is a config for a privileged loopback connection
var LoopbackClientConfig *restclient.Config
var SecureServing *apiserver.SecureServingInfo
if err := s.SecureServing.ApplyTo(&SecureServing, &LoopbackClientConfig); err != nil {
klog.ErrorS(err, "failed to apply secure server configuration")
return
}
var factory registry.LogFormatFactory
if factory == nil {
klog.ClearLogger()
} else {
log, logrFlush := factory.Create(registry.LoggingConfiguration{
Format: s.Logging.Format,
})
defer logrFlush()
klog.SetLogger(log)
}
ctx, done := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
pathRecorderMux := mux.NewPathRecorderMux("descheduler")
if !s.DisableMetrics {
pathRecorderMux.Handle("/metrics", legacyregistry.HandlerWithReset())
}
healthz.InstallHandler(pathRecorderMux, healthz.NamedCheck("Descheduler", healthz.PingHealthz.Check))
stoppedCh, _, err := SecureServing.Serve(pathRecorderMux, 0, ctx.Done())
logs.InitLogs()
defer logs.FlushLogs()
err := Run(s)
if err != nil {
klog.Fatalf("failed to start secure server: %v", err)
return
klog.Errorf("%v", err)
}
err = Run(ctx, s)
if err != nil {
klog.ErrorS(err, "descheduler server")
}
done()
// wait for metrics server to close
<-stoppedCh
},
}
cmd.SetOut(out)
cmd.SetOutput(out)
flags := cmd.Flags()
flags.SetNormalizeFunc(aflag.WordSepNormalizeFunc)
flags.AddGoFlagSet(flag.CommandLine)
s.AddFlags(flags)
return cmd
}
func Run(ctx context.Context, rs *options.DeschedulerServer) error {
return descheduler.Run(ctx, rs)
func Run(rs *options.DeschedulerServer) error {
return descheduler.Run(rs)
}

View File

@@ -18,19 +18,70 @@ package app
import (
"fmt"
"runtime"
"strings"
"github.com/spf13/cobra"
"sigs.k8s.io/descheduler/pkg/version"
)
var (
// gitCommit is a constant representing the source version that
// generated this build. It should be set during build via -ldflags.
gitCommit string
// version is a constant representing the version tag that
// generated this build. It should be set during build via -ldflags.
version string
// buildDate in ISO8601 format, output of $(date -u +'%Y-%m-%dT%H:%M:%SZ')
//It should be set during build via -ldflags.
buildDate string
)
// Info holds the information related to descheduler app version.
type Info struct {
Major string `json:"major"`
Minor string `json:"minor"`
GitCommit string `json:"gitCommit"`
GitVersion string `json:"gitVersion"`
BuildDate string `json:"buildDate"`
GoVersion string `json:"goVersion"`
Compiler string `json:"compiler"`
Platform string `json:"platform"`
}
// Get returns the overall codebase version. It's for detecting
// what code a binary was built from.
func Get() Info {
majorVersion, minorVersion := splitVersion(version)
return Info{
Major: majorVersion,
Minor: minorVersion,
GitCommit: gitCommit,
GitVersion: version,
BuildDate: buildDate,
GoVersion: runtime.Version(),
Compiler: runtime.Compiler,
Platform: fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH),
}
}
func NewVersionCommand() *cobra.Command {
var versionCmd = &cobra.Command{
Use: "version",
Short: "Version of descheduler",
Long: `Prints the version of descheduler.`,
Run: func(cmd *cobra.Command, args []string) {
fmt.Printf("Descheduler version %+v\n", version.Get())
fmt.Printf("Descheduler version %+v\n", Get())
},
}
return versionCmd
}
// splitVersion splits the git version to generate major and minor versions needed.
func splitVersion(version string) (string, string) {
if version == "" {
return "", ""
}
// A sample version would be of form v0.1.0-7-ge884046, so split at first '.' and
// then return 0 and 1+(+ appended to follow semver convention) for major and minor versions.
return strings.Trim(strings.Split(version, ".")[0], "v"), strings.Split(version, ".")[1] + "+"
}

View File

@@ -17,23 +17,20 @@ limitations under the License.
package main
import (
"flag"
"fmt"
"os"
"k8s.io/component-base/cli"
"k8s.io/klog/v2"
"sigs.k8s.io/descheduler/cmd/descheduler/app"
)
func init() {
klog.SetOutput(os.Stdout)
klog.InitFlags(nil)
}
func main() {
out := os.Stdout
cmd := app.NewDeschedulerCommand(out)
cmd.AddCommand(app.NewVersionCommand())
code := cli.Run(cmd)
os.Exit(code)
flag.CommandLine.Parse([]string{})
if err := cmd.Execute(); err != nil {
fmt.Println(err)
os.Exit(1)
}
}

View File

@@ -3,10 +3,10 @@
## Required Tools
- [Git](https://git-scm.com/downloads)
- [Go 1.16+](https://golang.org/dl/)
- [Go 1.14+](https://golang.org/dl/)
- [Docker](https://docs.docker.com/install/)
- [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl)
- [kind v0.10.0+](https://kind.sigs.k8s.io/)
- [kind](https://kind.sigs.k8s.io/)
## Build and Run
@@ -31,40 +31,12 @@ View all CLI options.
## Run Tests
```
GOOS=linux make dev-image
make kind-multi-node
kind create cluster --config hack/kind_config.yaml
kind load docker-image <image name>
kind get kubeconfig > /tmp/admin.conf
export KUBECONFIG=/tmp/admin.conf
make test-unit
make test-e2e
```
## Build Helm Package locally
If you made some changes in the chart, and just want to check if templating is ok, or if the chart is buildable, you can run this command to have a package built from the `./charts` directory.
```
make build-helm
```
## Lint Helm Chart locally
To check linting of your changes in the helm chart locally you can run:
```
make lint-helm
```
## Test helm changes locally with kind and ct
You will need kind and docker (or equivalent) installed. We can use ct public image to avoid installing ct and all its dependencies.
```
make kind-multi-node
make ct-helm
```
### Miscellaneous
See the [hack directory](https://github.com/kubernetes-sigs/descheduler/tree/master/hack) for additional tools and scripts used for developing the descheduler.
See the [hack directory](https://github.com/kubernetes-sigs/descheduler/tree/master/hack) for additional tools and scripts used for developing the descheduler.

View File

@@ -1,16 +0,0 @@
# Proposals
This document walk you through about all the enhancements proposals for descheduler.
## Descheduler v1alpha2 Design Proposal
```yaml
title: Descheduler v1alpha2 Design Proposal
authors:
- "@damemi"
link:
- https://docs.google.com/document/d/1S1JCh-0F-QCJvBBG-kbmXiHAJFF8doArhDIAKbOj93I/edit#heading=h.imbp1ctnc8lx
- https://github.com/kubernetes-sigs/descheduler/issues/679
owning-sig: sig-scheduling
creation-date: 2021-05-01
status: implementable
```

View File

@@ -1,83 +1,32 @@
# Release Guide
The process for publishing each Descheduler release includes a mixture of manual and automatic steps. Over
time, it would be good to automate as much of this process as possible. However, due to current limitations there
is care that must be taken to perform each manual step precisely so that the automated steps execute properly.
## Container Image
## Pre-release Code Changes
### Semi-automatic
Before publishing each release, the following code updates must be made:
1. Make sure your repo is clean by git's standards
2. Create a release branch `git checkout -b release-1.18` (not required for patch releases)
3. Push the release branch to the descheuler repo and ensure branch protection is enabled (not required for patch releases)
4. Tag the repository and push the tag `VERSION=v0.18.0 git tag -m $VERSION $VERSION; git push origin $VERSION`
5. Publish a draft release using the tag you just created
6. Perform the [image promotion process](https://github.com/kubernetes/k8s.io/tree/master/k8s.gcr.io#image-promoter)
7. Publish release
8. Email `kubernetes-sig-scheduling@googlegroups.com` to announce the release
- [ ] (Optional, but recommended) Bump `k8s.io` dependencies to the `-rc` tags. These tags are usually published around upstream code freeze. [Example](https://github.com/kubernetes-sigs/descheduler/pull/539)
- [ ] Bump `k8s.io` dependencies to GA tags once they are published (following the upstream release). [Example](https://github.com/kubernetes-sigs/descheduler/pull/615)
- [ ] Ensure that Go is updated to the same version as upstream. [Example](https://github.com/kubernetes-sigs/descheduler/pull/801)
- [ ] Make CI changes in [github.com/kubernetes/test-infra](https://github.com/kubernetes/test-infra) to add the new version's tests (note, this may also include a Go bump). [Example](https://github.com/kubernetes/test-infra/pull/25833)
- [ ] Update local CI versions for utils (such as golang-ci), kind, and go. [Example - e2e](https://github.com/kubernetes-sigs/descheduler/commit/ac4d576df8831c0c399ee8fff1e85469e90b8c44), [Example - helm](https://github.com/kubernetes-sigs/descheduler/pull/821)
- [ ] Update version references in docs and Readme. [Example](https://github.com/kubernetes-sigs/descheduler/pull/617)
### Manual
## Release Process
When the above pre-release steps are complete and the release is ready to be cut, perform the following steps **in order**
(the flowchart below demonstrates these steps):
**Version release**
1. Create the `git tag` on `master` for the release, eg `v0.24.0`
2. Merge Helm chart version update to `master` (see [Helm chart](#helm-chart) below). [Example](https://github.com/kubernetes-sigs/descheduler/pull/709)
3. Perform the [image promotion process](https://github.com/kubernetes/k8s.io/tree/main/k8s.gcr.io#image-promoter). [Example](https://github.com/kubernetes/k8s.io/pull/3344)
4. Cut release branch from `master`, eg `release-1.24`
5. Publish release using Github's release process from the git tag you created
6. Email `kubernetes-sig-scheduling@googlegroups.com` to announce the release
**Patch release**
1. Pick relevant code change commits to the matching release branch, eg `release-1.24`
2. Create the patch tag on the release branch, eg `v0.24.1` on `release-1.24`
3. Merge Helm chart version update to release branch
4. Perform the image promotion process for the patch version
5. Publish release using Github's release process from the git tag you created
6. Email `kubernetes-sig-scheduling@googlegroups.com` to announce the release
### Flowchart
![Flowchart for major and patch releases](release-process.png)
### Image promotion process
Every merge to any branch triggers an [image build and push](https://github.com/kubernetes/test-infra/blob/c36b8e5/config/jobs/image-pushing/k8s-staging-descheduler.yaml) to a `gcr.io` repository.
These automated image builds are snapshots of the code in place at the time of every PR merge and
tagged with the latest git SHA at the time of the build. To create a final release image, the desired
auto-built image SHA is added to a [file upstream](https://github.com/kubernetes/k8s.io/blob/e9e971c/k8s.gcr.io/images/k8s-staging-descheduler/images.yaml) which
copies that image to a public registry.
Automatic builds can be monitored and re-triggered with the [`post-descheduler-push-images` job](https://prow.k8s.io/?job=post-descheduler-push-images) on prow.k8s.io.
Note that images can also be manually built and pushed using `VERSION=$VERSION make push-all` by [users with access](https://github.com/kubernetes/k8s.io/blob/fbee8f67b70304241e613a672c625ad972998ad7/groups/sig-scheduling/groups.yaml#L33-L43).
## Helm Chart
We currently use the [chart-releaser-action GitHub Action](https://github.com/helm/chart-releaser-action) to automatically
publish [Helm chart releases](https://github.com/kubernetes-sigs/descheduler/blob/022e07c/.github/workflows/release.yaml).
This action is triggered when it detects any changes to [`Chart.yaml`](https://github.com/kubernetes-sigs/descheduler/blob/022e07c27853fade6d1304adc0a6ebe02642386c/charts/descheduler/Chart.yaml) on
a `release-*` branch.
Helm chart releases are managed by a separate set of git tags that are prefixed with `descheduler-helm-chart-*`. Example git tag name is `descheduler-helm-chart-0.18.0`.
Released versions of the helm charts are stored in the `gh-pages` branch of this repo.
The major and minor version of the chart matches the descheduler major and minor versions. For example descheduler helm chart version helm-descheduler-chart-0.18.0 corresponds
to descheduler version v0.18.0. The patch version of the descheduler helm chart and the patcher version of the descheduler will not necessarily match. The patch
version of the descheduler helm chart is used to version changes specific to the helm chart.
1. Merge all helm chart changes into the master branch before the release is tagged/cut
1. Ensure that `appVersion` in file `charts/descheduler/Chart.yaml` matches the descheduler version(no `v` prefix)
2. Ensure that `version` in file `charts/descheduler/Chart.yaml` has been incremented. This is the chart version.
2. Make sure your repo is clean by git's standards
3. Follow the release-branch or patch release tagging pattern from the above section.
4. Verify the new helm artifact has been successfully pushed to the `gh-pages` branch
## Notes
The Helm releaser-action compares the changes in the action-triggering branch to the latest tag on that branch, so if you tag before creating the new branch there
will be nothing to compare and it will fail. This is why it's necessary to tag, eg, `v0.24.0` *before* making the changes to the
Helm chart version, so that there is a new diff for the action to find. (Tagging *after* making the Helm chart changes would
also work, but then the code that gets built into the promoted image will be tagged as `descheduler-helm-chart-xxx` rather than `v0.xx.0`).
1. Make sure your repo is clean by git's standards
2. Create a release branch `git checkout -b release-1.18` (not required for patch releases)
3. Push the release branch to the descheuler repo and ensure branch protection is enabled (not required for patch releases)
4. Tag the repository and push the tag `VERSION=v0.18.0 git tag -m $VERSION $VERSION; git push origin $VERSION`
5. Checkout the tag you just created and make sure your repo is clean by git's standards `git checkout $VERSION`
6. Build and push the container image to the staging registry `VERSION=$VERSION make push`
7. Publish a draft release using the tag you just created
8. Perform the [image promotion process](https://github.com/kubernetes/k8s.io/tree/master/k8s.gcr.io#image-promoter)
9. Publish release
10. Email `kubernetes-sig-scheduling@googlegroups.com` to announce the release
### Notes
See [post-descheduler-push-images dashboard](https://testgrid.k8s.io/sig-scheduling#post-descheduler-push-images) for staging registry image build job status.
View the descheduler staging registry using [this URL](https://console.cloud.google.com/gcr/images/k8s-staging-descheduler/GLOBAL/descheduler) in a web browser
@@ -102,3 +51,19 @@ Pull image from the staging registry.
```
docker pull gcr.io/k8s-staging-descheduler/descheduler:v20200206-0.9.0-94-ge2a23f284
```
## Helm Chart
Helm chart releases are managed by a separate set of git tags that are prefixed with `chart-*`. Example git tag name is `chart-0.18.0`. Released versions of the
helm charts are stored in the `gh-pages` branch of this repo. The [chart-releaser-action GitHub Action](https://github.com/helm/chart-releaser-action) is setup to
build and push the helm charts to the `gh-pages` branch when a `chart-*` git tag is created.
The major and minor version of the chart matches the descheduler major and minor versions. For example descheduler helm chart version chart-0.18.0 corresponds
to descheduler version v0.18.0. The patch version of the descheduler helm chart and the patcher version of the descheduler will not necessarily match. The patch
version of the descheduler helm chart is used to version changes specific to the helm chart.
1. Merge all helm chart changes into the appropriate release branch(i.e. release-1.18)
1. Ensure that `appVersion` in file `charts/descheduler/Chart.yaml` matches the descheduler version(no `v` prefix)
2. Ensure that `version` in file `charts/descheduler/Chart.yaml` has been incremented. This is the chart version.
2. Make sure your repo is clean by git's standards
3. Create the tag and push it `git checkout release-1.18; CHART_VERSION=chart-0.18.0; git tag $CHART_VERSION; git push origin $CHART_VERSION`
4. Verify the new helm artifact has been successfully pushed to the `gh-pages` branch

Binary file not shown.

Before

Width:  |  Height:  |  Size: 121 KiB

View File

@@ -1,28 +1,9 @@
# User Guide
Starting with descheduler release v0.10.0 container images are available in the official k8s container registry.
Descheduler Version | Container Image | Architectures |
------------------- |--------------------------------------------|-------------------------|
v0.25.0 | k8s.gcr.io/descheduler/descheduler:v0.25.0 | AMD64<br>ARM64<br>ARMv7 |
v0.24.1 | k8s.gcr.io/descheduler/descheduler:v0.24.1 | AMD64<br>ARM64<br>ARMv7 |
v0.24.0 | k8s.gcr.io/descheduler/descheduler:v0.24.0 | AMD64<br>ARM64<br>ARMv7 |
v0.23.1 | k8s.gcr.io/descheduler/descheduler:v0.23.1 | AMD64<br>ARM64<br>ARMv7 |
v0.22.0 | k8s.gcr.io/descheduler/descheduler:v0.22.0 | AMD64<br>ARM64<br>ARMv7 |
v0.21.0 | k8s.gcr.io/descheduler/descheduler:v0.21.0 | AMD64<br>ARM64<br>ARMv7 |
v0.20.0 | k8s.gcr.io/descheduler/descheduler:v0.20.0 | AMD64<br>ARM64 |
v0.19.0 | k8s.gcr.io/descheduler/descheduler:v0.19.0 | AMD64 |
v0.18.0 | k8s.gcr.io/descheduler/descheduler:v0.18.0 | AMD64 |
v0.10.0 | k8s.gcr.io/descheduler/descheduler:v0.10.0 | AMD64 |
Note that multi-arch container images cannot be pulled by [kind](https://kind.sigs.k8s.io) from a registry. Therefore
starting with descheduler release v0.20.0 use the below process to download the official descheduler
image into a kind cluster.
```
kind create cluster
docker pull k8s.gcr.io/descheduler/descheduler:v0.20.0
kind load docker-image k8s.gcr.io/descheduler/descheduler:v0.20.0
```
Starting with descheduler release v0.10.0 container images are available in these container registries.
* `asia.gcr.io/k8s-artifacts-prod/descheduler/descheduler`
* `eu.gcr.io/k8s-artifacts-prod/descheduler/descheduler`
* `us.gcr.io/k8s-artifacts-prod/descheduler/descheduler`
## Policy Configuration Examples
The [examples](https://github.com/kubernetes-sigs/descheduler/tree/master/examples) directory has descheduler policy configuration examples.
@@ -38,52 +19,31 @@ Usage:
descheduler [command]
Available Commands:
completion generate the autocompletion script for the specified shell
help Help about any command
version Version of descheduler
Flags:
--add-dir-header If true, adds the file directory to the header of the log messages (DEPRECATED: will be removed in a future release, see https://github.com/kubernetes/enhancements/tree/master/keps/sig-instrumentation/2845-deprecate-klog-specific-flags-in-k8s-components)
--alsologtostderr log to standard error as well as files (DEPRECATED: will be removed in a future release, see https://github.com/kubernetes/enhancements/tree/master/keps/sig-instrumentation/2845-deprecate-klog-specific-flags-in-k8s-components)
--bind-address ip The IP address on which to listen for the --secure-port port. The associated interface(s) must be reachable by the rest of the cluster, and by CLI/web clients. If blank or an unspecified address (0.0.0.0 or ::), all interfaces will be used. (default 0.0.0.0)
--cert-dir string The directory where the TLS certs are located. If --tls-cert-file and --tls-private-key-file are provided, this flag will be ignored. (default "apiserver.local.config/certificates")
--descheduling-interval duration Time interval between two consecutive descheduler executions. Setting this value instructs the descheduler to run in a continuous loop at the interval specified.
--disable-metrics Disables metrics. The metrics are by default served through https://localhost:10258/metrics. Secure address, resp. port can be changed through --bind-address, resp. --secure-port flags.
--dry-run execute descheduler in dry run mode.
-h, --help help for descheduler
--http2-max-streams-per-connection int The limit that the server gives to clients for the maximum number of streams in an HTTP/2 connection. Zero means to use golang's default.
--kubeconfig string File with kube configuration.
--leader-elect Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.
--leader-elect-lease-duration duration The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled. (default 15s)
--leader-elect-renew-deadline duration The interval between attempts by the acting master to renew a leadership slot before it stops leading. This must be less than or equal to the lease duration. This is only applicable if leader election is enabled. (default 10s)
--leader-elect-resource-lock string The type of resource object that is used for locking during leader election. Supported options are 'endpoints', 'configmaps', 'leases', 'endpointsleases' and 'configmapsleases'. (default "leases")
--leader-elect-resource-name string The name of resource object that is used for locking during leader election. (default "descheduler")
--leader-elect-resource-namespace string The namespace of resource object that is used for locking during leader election. (default "kube-system")
--leader-elect-retry-period duration The duration the clients should wait between attempting acquisition and renewal of a leadership. This is only applicable if leader election is enabled. (default 2s)
--log-backtrace-at traceLocation when logging hits line file:N, emit a stack trace (default :0) (DEPRECATED: will be removed in a future release, see https://github.com/kubernetes/enhancements/tree/master/keps/sig-instrumentation/2845-deprecate-klog-specific-flags-in-k8s-components)
--log_dir string If non-empty, write log files in this directory (DEPRECATED: will be removed in a future release, see https://github.com/kubernetes/enhancements/tree/master/keps/sig-instrumentation/2845-deprecate-klog-specific-flags-in-k8s-components)
--log_file string If non-empty, use this log file (DEPRECATED: will be removed in a future release, see https://github.com/kubernetes/enhancements/tree/master/keps/sig-instrumentation/2845-deprecate-klog-specific-flags-in-k8s-components)
--log_file_max_size uint Defines the maximum size a log file can grow to. Unit is megabytes. If the value is 0, the maximum file size is unlimited. (default 1800) (DEPRECATED: will be removed in a future release, see https://github.com/kubernetes/enhancements/tree/master/keps/sig-instrumentation/2845-deprecate-klog-specific-flags-in-k8s-components)
--log-flush-frequency duration Maximum number of seconds between log flushes (default 5s)
--logging-format string Sets the log format. Permitted formats: "text", "json". Non-default formats don't honor these flags: --add-dir-header, --alsologtostderr, --log-backtrace-at, --log_dir, --log_file, --log_file_max_size, --logtostderr, --skip-headers, --skip-log-headers, --stderrthreshold, --log-flush-frequency.\nNon-default choices are currently alpha and subject to change without warning. (default "text")
--logtostderr log to standard error instead of files (default true) (DEPRECATED: will be removed in a future release, see https://github.com/kubernetes/enhancements/tree/master/keps/sig-instrumentation/2845-deprecate-klog-specific-flags-in-k8s-components)
--one-output If true, only write logs to their native severity level (vs also writing to each lower severity level) (DEPRECATED: will be removed in a future release, see https://github.com/kubernetes/enhancements/tree/master/keps/sig-instrumentation/2845-deprecate-klog-specific-flags-in-k8s-components)
--permit-address-sharing If true, SO_REUSEADDR will be used when binding the port. This allows binding to wildcard IPs like 0.0.0.0 and specific IPs in parallel, and it avoids waiting for the kernel to release sockets in TIME_WAIT state. [default=false]
--permit-port-sharing If true, SO_REUSEPORT will be used when binding the port, which allows more than one instance to bind on the same address and port. [default=false]
--policy-config-file string File with descheduler policy configuration.
--secure-port int The port on which to serve HTTPS with authentication and authorization. If 0, don't serve HTTPS at all. (default 10258)
--skip-headers If true, avoid header prefixes in the log messages (DEPRECATED: will be removed in a future release, see https://github.com/kubernetes/enhancements/tree/master/keps/sig-instrumentation/2845-deprecate-klog-specific-flags-in-k8s-components)
--skip-log-headers If true, avoid headers when opening log files (DEPRECATED: will be removed in a future release, see https://github.com/kubernetes/enhancements/tree/master/keps/sig-instrumentation/2845-deprecate-klog-specific-flags-in-k8s-components)
--stderrthreshold severity logs at or above this threshold go to stderr (default 2) (DEPRECATED: will be removed in a future release, see https://github.com/kubernetes/enhancements/tree/master/keps/sig-instrumentation/2845-deprecate-klog-specific-flags-in-k8s-components)
--tls-cert-file string File containing the default x509 Certificate for HTTPS. (CA cert, if any, concatenated after server cert). If HTTPS serving is enabled, and --tls-cert-file and --tls-private-key-file are not provided, a self-signed certificate and key are generated for the public address and saved to the directory specified by --cert-dir.
--tls-cipher-suites strings Comma-separated list of cipher suites for the server. If omitted, the default Go cipher suites will be used.
Preferred values: TLS_AES_128_GCM_SHA256, TLS_AES_256_GCM_SHA384, TLS_CHACHA20_POLY1305_SHA256, TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA, TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA, TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305, TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA, TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384, TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305, TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256, TLS_RSA_WITH_AES_128_CBC_SHA, TLS_RSA_WITH_AES_128_GCM_SHA256, TLS_RSA_WITH_AES_256_CBC_SHA, TLS_RSA_WITH_AES_256_GCM_SHA384.
Insecure values: TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256, TLS_ECDHE_ECDSA_WITH_RC4_128_SHA, TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256, TLS_ECDHE_RSA_WITH_RC4_128_SHA, TLS_RSA_WITH_3DES_EDE_CBC_SHA, TLS_RSA_WITH_AES_128_CBC_SHA256, TLS_RSA_WITH_RC4_128_SHA.
--tls-min-version string Minimum TLS version supported. Possible values: VersionTLS10, VersionTLS11, VersionTLS12, VersionTLS13
--tls-private-key-file string File containing the default x509 private key matching --tls-cert-file.
--tls-sni-cert-key namedCertKey A pair of x509 certificate and private key file paths, optionally suffixed with a list of domain patterns which are fully qualified domain names, possibly with prefixed wildcard segments. The domain patterns also allow IP addresses, but IPs should only be used if the apiserver has visibility to the IP address requested by a client. If no domain patterns are provided, the names of the certificate are extracted. Non-wildcard matches trump over wildcard matches, explicit domain patterns trump over extracted names. For multiple key/certificate pairs, use the --tls-sni-cert-key multiple times. Examples: "example.crt,example.key" or "foo.crt,foo.key:*.foo.com,foo.com". (default [])
-v, --v Level number for the log level verbosity
--vmodule moduleSpec comma-separated list of pattern=N settings for file-filtered logging
--add-dir-header If true, adds the file directory to the header
--alsologtostderr log to standard error as well as files
--descheduling-interval duration Time interval between two consecutive descheduler executions. Setting this value instructs the descheduler to run in a continuous loop at the interval specified.
--dry-run execute descheduler in dry run mode.
--evict-local-storage-pods Enables evicting pods using local storage by descheduler
-h, --help help for descheduler
--kubeconfig string File with kube configuration.
--log-backtrace-at traceLocation when logging hits line file:N, emit a stack trace (default :0)
--log-dir string If non-empty, write log files in this directory
--log-file string If non-empty, use this log file
--log-file-max-size uint Defines the maximum size a log file can grow to. Unit is megabytes. If the value is 0, the maximum file size is unlimited. (default 1800)
--log-flush-frequency duration Maximum number of seconds between log flushes (default 5s)
--logtostderr log to standard error instead of files (default true)
--max-pods-to-evict-per-node int Limits the maximum number of pods to be evicted per node by descheduler
--node-selector string Selector (label query) to filter on, supports '=', '==', and '!='.(e.g. -l key1=value1,key2=value2)
--policy-config-file string File with descheduler policy configuration.
--skip-headers If true, avoid header prefixes in the log messages
--skip-log-headers If true, avoid headers when opening log files
--stderrthreshold severity logs at or above this threshold go to stderr (default 2)
-v, --v Level number for the log level verbosity
--vmodule moduleSpec comma-separated list of pattern=N settings for file-filtered logging
Use "descheduler [command] --help" for more information about a command.
```
@@ -111,65 +71,20 @@ This policy configuration file ensures that pods created more than 7 days ago ar
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"LowNodeUtilization":
enabled: false
"RemoveDuplicates":
enabled: false
"RemovePodsViolatingInterPodAntiAffinity":
enabled: false
"RemovePodsViolatingNodeAffinity":
enabled: false
"RemovePodsViolatingNodeTaints":
enabled: false
"RemovePodsHavingTooManyRestarts":
enabled: false
"PodLifeTime":
enabled: true
params:
podLifeTime:
maxPodLifeTimeSeconds: 604800 # pods run for a maximum of 7 days
maxPodLifeTimeSeconds: 604800 # pods run for a maximum of 7 days
```
### Balance Cluster By Node Memory Utilization
If your cluster has been running for a long period of time, you may find that the resource utilization is not very
balanced. The following two strategies can be used to rebalance your cluster based on `cpu`, `memory`
or `number of pods`.
#### Balance high utilization nodes
Using `LowNodeUtilization`, descheduler will rebalance the cluster based on memory by evicting pods
from nodes with memory utilization over 70% to nodes with memory utilization below 20%.
```
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"LowNodeUtilization":
enabled: true
params:
nodeResourceUtilizationThresholds:
thresholds:
"memory": 20
targetThresholds:
"memory": 70
```
#### Balance low utilization nodes
Using `HighNodeUtilization`, descheduler will rebalance the cluster based on memory by evicting pods
from nodes with memory utilization lower than 20%. This should be use `NodeResourcesFit` with the `MostAllocated` scoring strategy based on these [doc](https://kubernetes.io/docs/reference/scheduling/config/#scheduling-plugins).
The evicted pods will be compacted into minimal set of nodes.
```
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"HighNodeUtilization":
enabled: true
params:
nodeResourceUtilizationThresholds:
thresholds:
"memory": 20
```
### Autoheal Node Problems
Descheduler's `RemovePodsViolatingNodeTaints` strategy can be combined with
[Node Problem Detector](https://github.com/kubernetes/node-problem-detector/) and
[Cluster Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler) to automatically remove
Nodes which have problems. Node Problem Detector can detect specific Node problems and report them to the API server.
There is a feature called TaintNodeByCondition of the node controller that takes some conditions and turns them into taints. Currently, this only works for the default node conditions: PIDPressure, MemoryPressure, DiskPressure, Ready, and some cloud provider specific conditions.
The Descheduler will then deschedule workloads from those Nodes. Finally, if the descheduled Node's resource
allocation falls below the Cluster Autoscaler's scale down threshold, the Node will become a scale down candidate
and can be removed by Cluster Autoscaler. These three components form an autohealing cycle for Node problems.
---
**NOTE**
Once [kubernetes/node-problem-detector#565](https://github.com/kubernetes/node-problem-detector/pull/565) is available in NPD, we need to update this section.
---

View File

@@ -1,14 +0,0 @@
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"RemoveFailedPods":
enabled: true
params:
failedPods:
reasons:
- "OutOfcpu"
- "CreateContainerConfigError"
includingInitContainers: true
excludeOwnerKinds:
- "Job"
minPodLifetimeSeconds: 3600 # 1 hour

View File

@@ -1,9 +0,0 @@
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"HighNodeUtilization":
enabled: true
params:
nodeResourceUtilizationThresholds:
thresholds:
"memory": 20

View File

@@ -1,11 +0,0 @@
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"LowNodeUtilization":
enabled: true
params:
nodeResourceUtilizationThresholds:
thresholds:
"memory": 20
targetThresholds:
"memory": 70

View File

@@ -1,11 +1,20 @@
---
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"LowNodeUtilization":
enabled: false
"RemoveDuplicates":
enabled: false
"RemovePodsViolatingInterPodAntiAffinity":
enabled: false
"RemovePodsViolatingNodeAffinity":
enabled: false
"RemovePodsViolatingNodeTaints":
enabled: false
"RemovePodsHavingTooManyRestarts":
enabled: false
"PodLifeTime":
enabled: true
params:
podLifeTime:
maxPodLifeTimeSeconds: 604800 # 7 days
states:
- "Pending"
- "PodInitializing"
maxPodLifeTimeSeconds: 604800 # 7 days

View File

@@ -23,7 +23,3 @@ strategies:
podsHavingTooManyRestarts:
podRestartThreshold: 100
includingInitContainers: true
"RemovePodsViolatingTopologySpreadConstraint":
enabled: true
params:
includeSoftConstraints: true

View File

@@ -1,8 +0,0 @@
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"RemovePodsViolatingTopologySpreadConstraint":
enabled: true
params:
nodeFit: true
includeSoftConstraints: true # Include 'ScheduleAnyways' constraints

118
go.mod
View File

@@ -1,115 +1,15 @@
module sigs.k8s.io/descheduler
go 1.19
go 1.14
require (
github.com/client9/misspell v0.3.4
github.com/spf13/cobra v1.4.0
github.com/spf13/cobra v0.0.5
github.com/spf13/pflag v1.0.5
k8s.io/api v0.25.0
k8s.io/apimachinery v0.25.0
k8s.io/apiserver v0.25.0
k8s.io/client-go v0.25.0
k8s.io/code-generator v0.25.0
k8s.io/component-base v0.25.0
k8s.io/component-helpers v0.25.0
k8s.io/klog/v2 v2.70.1
k8s.io/utils v0.0.0-20220823124924-e9cbc92d1a73
sigs.k8s.io/mdtoc v1.0.1
)
require (
cloud.google.com/go v0.97.0 // indirect
github.com/Azure/go-autorest v14.2.0+incompatible // indirect
github.com/Azure/go-autorest/autorest v0.11.27 // indirect
github.com/Azure/go-autorest/autorest/adal v0.9.20 // indirect
github.com/Azure/go-autorest/autorest/date v0.3.0 // indirect
github.com/Azure/go-autorest/logger v0.2.1 // indirect
github.com/Azure/go-autorest/tracing v0.6.0 // indirect
github.com/BurntSushi/toml v0.3.1 // indirect
github.com/NYTimes/gziphandler v1.1.1 // indirect
github.com/PuerkitoBio/purell v1.1.1 // indirect
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/blang/semver/v4 v4.0.0 // indirect
github.com/cespare/xxhash/v2 v2.1.2 // indirect
github.com/coreos/go-semver v0.3.0 // indirect
github.com/coreos/go-systemd/v22 v22.3.2 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/emicklei/go-restful/v3 v3.8.0 // indirect
github.com/evanphx/json-patch v4.12.0+incompatible // indirect
github.com/felixge/httpsnoop v1.0.1 // indirect
github.com/fsnotify/fsnotify v1.4.9 // indirect
github.com/go-logr/logr v1.2.3 // indirect
github.com/go-logr/zapr v1.2.3 // indirect
github.com/go-openapi/jsonpointer v0.19.5 // indirect
github.com/go-openapi/jsonreference v0.19.5 // indirect
github.com/go-openapi/swag v0.19.14 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang-jwt/jwt/v4 v4.2.0 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/golang/protobuf v1.5.2 // indirect
github.com/gomarkdown/markdown v0.0.0-20200824053859-8c8b3816f167 // indirect
github.com/google/gnostic v0.5.7-v3refs // indirect
github.com/google/go-cmp v0.5.6 // indirect
github.com/google/gofuzz v1.1.0 // indirect
github.com/google/uuid v1.1.2 // indirect
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect
github.com/grpc-ecosystem/grpc-gateway v1.16.0 // indirect
github.com/imdario/mergo v0.3.6 // indirect
github.com/inconshreveable/mousetrap v1.0.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/mailru/easyjson v0.7.6 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect
github.com/mmarkdown/mmark v2.0.40+incompatible // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/client_golang v1.12.1 // indirect
github.com/prometheus/client_model v0.2.0 // indirect
github.com/prometheus/common v0.32.1 // indirect
github.com/prometheus/procfs v0.7.3 // indirect
go.etcd.io/etcd/api/v3 v3.5.4 // indirect
go.etcd.io/etcd/client/pkg/v3 v3.5.4 // indirect
go.etcd.io/etcd/client/v3 v3.5.4 // indirect
go.opentelemetry.io/contrib v0.20.0 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.20.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.20.0 // indirect
go.opentelemetry.io/otel v0.20.0 // indirect
go.opentelemetry.io/otel/exporters/otlp v0.20.0 // indirect
go.opentelemetry.io/otel/metric v0.20.0 // indirect
go.opentelemetry.io/otel/sdk v0.20.0 // indirect
go.opentelemetry.io/otel/sdk/export/metric v0.20.0 // indirect
go.opentelemetry.io/otel/sdk/metric v0.20.0 // indirect
go.opentelemetry.io/otel/trace v0.20.0 // indirect
go.opentelemetry.io/proto/otlp v0.7.0 // indirect
go.uber.org/atomic v1.7.0 // indirect
go.uber.org/multierr v1.6.0 // indirect
go.uber.org/zap v1.19.0 // indirect
golang.org/x/crypto v0.0.0-20220518034528-6f7dac969898 // indirect
golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4 // indirect
golang.org/x/net v0.0.0-20220722155237-a158d28d115b // indirect
golang.org/x/oauth2 v0.0.0-20211104180415-d3ed0bb246c8 // indirect
golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4 // indirect
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f // indirect
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect
golang.org/x/text v0.3.7 // indirect
golang.org/x/time v0.0.0-20220210224613-90d013bbcef8 // indirect
golang.org/x/tools v0.1.12 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/genproto v0.0.0-20220502173005-c8bf987b8c21 // indirect
google.golang.org/grpc v1.47.0 // indirect
google.golang.org/protobuf v1.28.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/natefinch/lumberjack.v2 v2.0.0 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/gengo v0.0.0-20211129171323-c02415ce4185 // indirect
k8s.io/kube-openapi v0.0.0-20220803162953-67bda5d908f1 // indirect
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.0.32 // indirect
sigs.k8s.io/json v0.0.0-20220713155537-f223a00ba0e2 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.2.3 // indirect
sigs.k8s.io/yaml v1.2.0 // indirect
k8s.io/api v0.18.4
k8s.io/apimachinery v0.18.4
k8s.io/apiserver v0.18.4
k8s.io/client-go v0.18.4
k8s.io/code-generator v0.18.4
k8s.io/component-base v0.18.4
k8s.io/klog/v2 v2.0.0
)

946
go.sum

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +0,0 @@
BUILD
CHANGELOG
OWNERS
go.mod
go.sum
vendor/

View File

@@ -18,15 +18,15 @@ E2E_GCE_HOME=$DESCHEDULER_ROOT/hack/e2e-gce
create_cluster() {
echo "#################### Creating instances ##########################"
gcloud compute instances create descheduler-$master_uuid --image-family="ubuntu-1804-lts" --image-project="ubuntu-os-cloud" --zone=us-east1-b
gcloud compute instances create descheduler-$master_uuid --image="ubuntu-1604-xenial-v20180306" --image-project="ubuntu-os-cloud" --zone=us-east1-b
# Keeping the --zone here so as to make sure that e2e's can run locally.
echo "gcloud compute instances delete descheduler-$master_uuid --zone=us-east1-b --quiet" > $E2E_GCE_HOME/delete_cluster.sh
gcloud compute instances create descheduler-$node1_uuid --image-family="ubuntu-1804-lts" --image-project="ubuntu-os-cloud" --zone=us-east1-b
gcloud compute instances create descheduler-$node1_uuid --image="ubuntu-1604-xenial-v20180306" --image-project="ubuntu-os-cloud" --zone=us-east1-b
echo "gcloud compute instances delete descheduler-$node1_uuid --zone=us-east1-b --quiet" >> $E2E_GCE_HOME/delete_cluster.sh
gcloud compute instances create descheduler-$node2_uuid --image-family="ubuntu-1804-lts" --image-project="ubuntu-os-cloud" --zone=us-east1-b
echo "gcloud compute instances delete descheduler-$node2_uuid --zone=us-east1-c --quiet" >> $E2E_GCE_HOME/delete_cluster.sh
gcloud compute instances create descheduler-$node2_uuid --image="ubuntu-1604-xenial-v20180306" --image-project="ubuntu-os-cloud" --zone=us-east1-b
echo "gcloud compute instances delete descheduler-$node2_uuid --zone=us-east1-b --quiet" >> $E2E_GCE_HOME/delete_cluster.sh
# Delete the firewall port created for master.
echo "gcloud compute firewall-rules delete kubeapiserver-$master_uuid --quiet" >> $E2E_GCE_HOME/delete_cluster.sh
@@ -44,10 +44,10 @@ generate_kubeadm_instance_files() {
transfer_install_files() {
gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$master_uuid:/tmp --zone=us-east1-b
gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$master_uuid:/tmp --zone=us-east1-b
gcloud compute scp $E2E_GCE_HOME/kubeadm_install.sh descheduler-$master_uuid:/tmp --zone=us-east1-b
gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$node1_uuid:/tmp --zone=us-east1-b
gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$node2_uuid:/tmp --zone=us-east1-c
gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$node1_uuid:/tmp --zone=us-east1-b
gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$node2_uuid:/tmp --zone=us-east1-b
}
@@ -55,19 +55,19 @@ install_kube() {
# Docker installation.
gcloud compute ssh descheduler-$master_uuid --command "sudo apt-get update; sudo apt-get install -y docker.io" --zone=us-east1-b
gcloud compute ssh descheduler-$node1_uuid --command "sudo apt-get update; sudo apt-get install -y docker.io" --zone=us-east1-b
gcloud compute ssh descheduler-$node2_uuid --command "sudo apt-get update; sudo apt-get install -y docker.io" --zone=us-east1-c
gcloud compute ssh descheduler-$node2_uuid --command "sudo apt-get update; sudo apt-get install -y docker.io" --zone=us-east1-b
# kubeadm installation.
# 1. Transfer files to master, nodes.
transfer_install_files
# 2. Install kubeadm.
#TODO: Add rm /tmp/kubeadm_install.sh
# Open port for kube API server
gcloud compute firewall-rules create kubeapiserver-$master_uuid --allow tcp:6443 --source-tags=descheduler-$master_uuid --source-ranges=0.0.0.0/0 --description="Opening api server port"
gcloud compute firewall-rules create kubeapiserver-$master_uuid --allow tcp:6443 --source-tags=descheduler-$master_uuid --source-ranges=0.0.0.0/0 --description="Opening api server port"
gcloud compute ssh descheduler-$master_uuid --command "sudo chmod 755 /tmp/kubeadm_preinstall.sh; sudo /tmp/kubeadm_preinstall.sh" --zone=us-east1-b
kubeadm_join_command=$(gcloud compute ssh descheduler-$master_uuid --command "sudo chmod 755 /tmp/kubeadm_install.sh; sudo /tmp/kubeadm_install.sh" --zone=us-east1-b|grep 'kubeadm join')
# Copy the kubeconfig file onto /tmp for e2e tests.
# Copy the kubeconfig file onto /tmp for e2e tests.
gcloud compute ssh descheduler-$master_uuid --command "sudo cp /etc/kubernetes/admin.conf /tmp; sudo chmod 777 /tmp/admin.conf" --zone=us-east1-b
gcloud compute scp descheduler-$master_uuid:/tmp/admin.conf /tmp/admin.conf --zone=us-east1-b
@@ -75,15 +75,16 @@ install_kube() {
gcloud compute ssh descheduler-$master_uuid --command "sudo kubectl apply -f https://raw.githubusercontent.com/cloudnativelabs/kube-router/master/daemonset/kubeadm-kuberouter.yaml --kubeconfig /etc/kubernetes/admin.conf" --zone=us-east1-b
echo $kubeadm_join_command > $E2E_GCE_HOME/kubeadm_join.sh
# Copy kubeadm_join to every node.
# Copy kubeadm_join to every node.
#TODO: Put these in a loop, so that extension becomes possible.
gcloud compute ssh descheduler-$node1_uuid --command "sudo chmod 755 /tmp/kubeadm_preinstall.sh; sudo /tmp/kubeadm_preinstall.sh" --zone=us-east1-b
gcloud compute scp $E2E_GCE_HOME/kubeadm_join.sh descheduler-$node1_uuid:/tmp --zone=us-east1-b
gcloud compute ssh descheduler-$node1_uuid --command "sudo chmod 755 /tmp/kubeadm_join.sh; sudo /tmp/kubeadm_join.sh" --zone=us-east1-b
gcloud compute ssh descheduler-$node2_uuid --command "sudo chmod 755 /tmp/kubeadm_preinstall.sh; sudo /tmp/kubeadm_preinstall.sh" --zone=us-east1-c
gcloud compute scp $E2E_GCE_HOME/kubeadm_join.sh descheduler-$node2_uuid:/tmp --zone=us-east1-c
gcloud compute ssh descheduler-$node2_uuid --command "sudo chmod 755 /tmp/kubeadm_join.sh; sudo /tmp/kubeadm_join.sh" --zone=us-east1-c
gcloud compute ssh descheduler-$node2_uuid --command "sudo chmod 755 /tmp/kubeadm_preinstall.sh; sudo /tmp/kubeadm_preinstall.sh" --zone=us-east1-b
gcloud compute scp $E2E_GCE_HOME/kubeadm_join.sh descheduler-$node2_uuid:/tmp --zone=us-east1-b
gcloud compute ssh descheduler-$node2_uuid --command "sudo chmod 755 /tmp/kubeadm_join.sh; sudo /tmp/kubeadm_join.sh" --zone=us-east1-b
}

View File

@@ -1,18 +1,6 @@
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
apiVersion: kind.sigs.k8s.io/v1alpha3
nodes:
- role: control-plane
- role: worker
kubeadmConfigPatches:
- |
kind: JoinConfiguration
nodeRegistration:
kubeletExtraArgs:
node-labels: "topology.kubernetes.io/zone=local-a"
- role: worker
kubeadmConfigPatches:
- |
kind: JoinConfiguration
nodeRegistration:
kubeletExtraArgs:
node-labels: "topology.kubernetes.io/zone=local-b"

View File

@@ -1,4 +1,3 @@
//go:build tools
// +build tools
/*
@@ -20,8 +19,4 @@ limitations under the License.
// This package imports things required by build scripts, to force `go mod` to see them as dependencies
package tools
import (
_ "github.com/client9/misspell/cmd/misspell"
_ "k8s.io/code-generator"
_ "sigs.k8s.io/mdtoc"
)
import _ "k8s.io/code-generator"

View File

@@ -5,6 +5,6 @@ go build -o "${OS_OUTPUT_BINPATH}/deepcopy-gen" "k8s.io/code-generator/cmd/deepc
${OS_OUTPUT_BINPATH}/deepcopy-gen \
--go-header-file "hack/boilerplate/boilerplate.go.txt" \
--input-dirs "${PRJ_PREFIX}/pkg/apis/componentconfig,${PRJ_PREFIX}/pkg/apis/componentconfig/v1alpha1,${PRJ_PREFIX}/pkg/api,${PRJ_PREFIX}/pkg/api/v1alpha1,${PRJ_PREFIX}/pkg/framework/plugins/defaultevictor/" \
--input-dirs "${PRJ_PREFIX}/pkg/apis/componentconfig,${PRJ_PREFIX}/pkg/apis/componentconfig/v1alpha1,${PRJ_PREFIX}/pkg/api,${PRJ_PREFIX}/pkg/api/v1alpha1" \
--output-file-base zz_generated.deepcopy

View File

@@ -23,7 +23,7 @@ DESCHEDULER_ROOT=$(dirname "${BASH_SOURCE}")/..
GO_VERSION=($(go version))
if [[ -z $(echo "${GO_VERSION[2]}" | grep -E 'go1.17|go1.18|go1.19') ]]; then
if [[ -z $(echo "${GO_VERSION[2]}" | grep -E 'go1.2|go1.3|go1.4|go1.5|go1.6|go1.7|go1.8|go1.9|go1.10|go1.11|go1.12|go1.13|go1.14') ]]; then
echo "Unknown go version '${GO_VERSION[2]}', skipping gofmt."
exit 1
fi

View File

@@ -1,25 +0,0 @@
#!/bin/bash
# Copyright 2021 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -o errexit
set -o nounset
set -o pipefail
source "$(dirname "${BASH_SOURCE}")/lib/init.sh"
go build -o "${OS_OUTPUT_BINPATH}/mdtoc" "sigs.k8s.io/mdtoc"
${OS_OUTPUT_BINPATH}/mdtoc --inplace README.md

View File

@@ -1 +0,0 @@
${CONTAINER_ENGINE:-docker} run -it --rm --network host --workdir=/data --volume ~/.kube/config:/root/.kube/config:ro --volume $(pwd):/data quay.io/helmpack/chart-testing:v3.7.0 /bin/bash -c "git config --global --add safe.directory /data; ct install --config=.github/ci/ct.yaml --helm-extra-set-args=\"--set=kind=Deployment\""

View File

@@ -1,36 +0,0 @@
#!/bin/bash
set -o errexit
set -o nounset
set -o pipefail
source "$(dirname "${BASH_SOURCE}")/lib/init.sh"
DESCHEDULER_ROOT=$(dirname "${BASH_SOURCE}")/..
mkdir -p "${DESCHEDULER_ROOT}/_tmp"
_tmpdir="$(mktemp -d "${DESCHEDULER_ROOT}/_tmp/kube-verify.XXXXXX")"
_deschedulertmp="${_tmpdir}"
mkdir -p "${_deschedulertmp}"
git archive --format=tar --prefix=descheduler/ "$(git write-tree)" | (cd "${_deschedulertmp}" && tar xf -)
_deschedulertmp="${_deschedulertmp}/descheduler"
pushd "${_deschedulertmp}" > /dev/null 2>&1
go build -o "${OS_OUTPUT_BINPATH}/conversion-gen" "k8s.io/code-generator/cmd/conversion-gen"
${OS_OUTPUT_BINPATH}/conversion-gen \
--go-header-file "hack/boilerplate/boilerplate.go.txt" \
--input-dirs "./pkg/apis/componentconfig/v1alpha1,./pkg/api/v1alpha1" \
--output-file-base zz_generated.conversion
popd > /dev/null 2>&1
pushd "${DESCHEDULER_ROOT}" > /dev/null 2>&1
if ! _out="$(diff -Naupr pkg/ "${_deschedulertmp}/pkg/")"; then
echo "Generated output differs:" >&2
echo "${_out}" >&2
echo "Generated conversions verify failed. Please run ./hack/update-generated-conversions.sh"
exit 1
fi
popd > /dev/null 2>&1
echo "Generated conversions verified."

View File

@@ -1,36 +0,0 @@
#!/bin/bash
set -o errexit
set -o nounset
set -o pipefail
source "$(dirname "${BASH_SOURCE}")/lib/init.sh"
DESCHEDULER_ROOT=$(dirname "${BASH_SOURCE}")/..
mkdir -p "${DESCHEDULER_ROOT}/_tmp"
_tmpdir="$(mktemp -d "${DESCHEDULER_ROOT}/_tmp/kube-verify.XXXXXX")"
_deschedulertmp="${_tmpdir}"
mkdir -p "${_deschedulertmp}"
git archive --format=tar --prefix=descheduler/ "$(git write-tree)" | (cd "${_deschedulertmp}" && tar xf -)
_deschedulertmp="${_deschedulertmp}/descheduler"
pushd "${_deschedulertmp}" > /dev/null 2>&1
go build -o "${OS_OUTPUT_BINPATH}/deepcopy-gen" "k8s.io/code-generator/cmd/deepcopy-gen"
${OS_OUTPUT_BINPATH}/deepcopy-gen \
--go-header-file "hack/boilerplate/boilerplate.go.txt" \
--input-dirs "./pkg/apis/componentconfig,./pkg/apis/componentconfig/v1alpha1,./pkg/api,./pkg/api/v1alpha1,./pkg/framework/plugins/defaultevictor/" \
--output-file-base zz_generated.deepcopy
popd > /dev/null 2>&1
pushd "${DESCHEDULER_ROOT}" > /dev/null 2>&1
if ! _out="$(diff -Naupr pkg/ "${_deschedulertmp}/pkg/")"; then
echo "Generated deep-copies output differs:" >&2
echo "${_out}" >&2
echo "Generated deep-copies verify failed. Please run ./hack/update-generated-deep-copies.sh"
exit 1
fi
popd > /dev/null 2>&1
echo "Generated deep-copies verified."

View File

@@ -1,35 +0,0 @@
#!/bin/bash
set -o errexit
set -o nounset
set -o pipefail
source "$(dirname "${BASH_SOURCE}")/lib/init.sh"
DESCHEDULER_ROOT=$(dirname "${BASH_SOURCE}")/..
_tmpdir="$(mktemp -d "${DESCHEDULER_ROOT}/_tmp/kube-verify.XXXXXX")"
_deschedulertmp="${_tmpdir}"
mkdir -p "${_deschedulertmp}"
git archive --format=tar --prefix=descheduler/ "$(git write-tree)" | (cd "${_deschedulertmp}" && tar xf -)
_deschedulertmp="${_deschedulertmp}/descheduler"
pushd "${_deschedulertmp}" > /dev/null 2>&1
go build -o "${OS_OUTPUT_BINPATH}/defaulter-gen" "k8s.io/code-generator/cmd/defaulter-gen"
${OS_OUTPUT_BINPATH}/defaulter-gen \
--go-header-file "hack/boilerplate/boilerplate.go.txt" \
--input-dirs "${PRJ_PREFIX}/pkg/apis/componentconfig/v1alpha1,${PRJ_PREFIX}/pkg/api/v1alpha1" \
--extra-peer-dirs "${PRJ_PREFIX}/pkg/apis/componentconfig/v1alpha1,${PRJ_PREFIX}/pkg/api/v1alpha1" \
--output-file-base zz_generated.defaults
popd > /dev/null 2>&1
pushd "${DESCHEDULER_ROOT}" > /dev/null 2>&1
if ! _out="$(diff -Naupr pkg/ "${_deschedulertmp}/pkg/")"; then
echo "Generated defaulters output differs:" >&2
echo "${_out}" >&2
echo "Generated defaulters verify failed. Please run ./hack/update-generated-defaulters.sh"
fi
popd > /dev/null 2>&1
echo "Generated Defaulters verified."

View File

@@ -23,7 +23,7 @@ DESCHEDULER_ROOT=$(dirname "${BASH_SOURCE}")/..
GO_VERSION=($(go version))
if [[ -z $(echo "${GO_VERSION[2]}" | grep -E 'go1.17|go1.18|go1.19') ]]; then
if [[ -z $(echo "${GO_VERSION[2]}" | grep -E 'go1.2|go1.3|go1.4|go1.5|go1.6|go1.7|go1.8|go1.9|go1.10|go1.11|go1.12|go1.13|go1.14') ]]; then
echo "Unknown go version '${GO_VERSION[2]}', skipping gofmt."
exit 1
fi

View File

@@ -1,19 +0,0 @@
#!/bin/bash
# Copyright 2021 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
source "$(dirname "${BASH_SOURCE}")/lib/init.sh"
go vet ${OS_ROOT}/...

View File

@@ -1,41 +0,0 @@
#!/usr/bin/env bash
# Copyright 2018 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script checks commonly misspelled English words in all files in the
# working directory by client9/misspell package.
# Usage: `hack/verify-spelling.sh`.
set -o errexit
set -o nounset
set -o pipefail
KUBE_ROOT=$(dirname "${BASH_SOURCE[0]}")/..
export KUBE_ROOT
source "${KUBE_ROOT}/hack/lib/init.sh"
# Ensure that we find the binaries we build before anything else.
export GOBIN="${OS_OUTPUT_BINPATH}"
PATH="${GOBIN}:${PATH}"
# Install tools we need
pushd "${KUBE_ROOT}" >/dev/null
GO111MODULE=on go install github.com/client9/misspell/cmd/misspell
popd >/dev/null
# Spell checking
# All the skipping files are defined in hack/.spelling_failures
skipping_file="${KUBE_ROOT}/hack/.spelling_failures"
failing_packages=$(sed "s| | -e |g" "${skipping_file}")
git ls-files | grep -v -e "${failing_packages}" | xargs misspell -i "Creater,creater,ect" -error -o stderr

View File

@@ -1,29 +0,0 @@
#!/bin/bash
# Copyright 2021 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -o errexit
set -o nounset
set -o pipefail
source "$(dirname "${BASH_SOURCE}")/lib/init.sh"
go build -o "${OS_OUTPUT_BINPATH}/mdtoc" "sigs.k8s.io/mdtoc"
if ! ${OS_OUTPUT_BINPATH}/mdtoc --inplace --dryrun README.md
then
echo "ERROR: Changes detected to table of contents. Run ./hack/update-toc.sh" >&2
exit 1
fi

View File

@@ -1,6 +0,0 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- configmap.yaml
- rbac.yaml

View File

@@ -1,50 +0,0 @@
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: descheduler-cluster-role
rules:
- apiGroups: ["events.k8s.io"]
resources: ["events"]
verbs: ["create", "update"]
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "watch", "list"]
- apiGroups: [""]
resources: ["namespaces"]
verbs: ["get", "watch", "list"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "watch", "list", "delete"]
- apiGroups: [""]
resources: ["pods/eviction"]
verbs: ["create"]
- apiGroups: ["scheduling.k8s.io"]
resources: ["priorityclasses"]
verbs: ["get", "watch", "list"]
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
verbs: ["create"]
- apiGroups: ["coordination.k8s.io"]
resources: ["leases"]
resourceNames: ["descheduler"]
verbs: ["get", "patch", "delete"]
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: descheduler-sa
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: descheduler-cluster-role-binding
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: descheduler-cluster-role
subjects:
- name: descheduler-sa
kind: ServiceAccount
namespace: kube-system

35
kubernetes/cronjob.yaml Normal file
View File

@@ -0,0 +1,35 @@
---
apiVersion: batch/v1beta1
kind: CronJob
metadata:
name: descheduler-cronjob
namespace: kube-system
spec:
schedule: "*/2 * * * *"
concurrencyPolicy: "Forbid"
jobTemplate:
spec:
template:
metadata:
name: descheduler-pod
spec:
priorityClassName: system-cluster-critical
containers:
- name: descheduler
image: us.gcr.io/k8s-artifacts-prod/descheduler/descheduler:v0.18.0
volumeMounts:
- mountPath: /policy-dir
name: policy-volume
command:
- "/bin/descheduler"
args:
- "--policy-config-file"
- "/policy-dir/policy.yaml"
- "--v"
- "3"
restartPolicy: "Never"
serviceAccountName: descheduler-sa
volumes:
- name: policy-volume
configMap:
name: descheduler-policy-configmap

View File

@@ -1,55 +0,0 @@
---
apiVersion: batch/v1
kind: CronJob
metadata:
name: descheduler-cronjob
namespace: kube-system
spec:
schedule: "*/2 * * * *"
concurrencyPolicy: "Forbid"
jobTemplate:
spec:
template:
metadata:
name: descheduler-pod
spec:
priorityClassName: system-cluster-critical
containers:
- name: descheduler
image: k8s.gcr.io/descheduler/descheduler:v0.25.0
volumeMounts:
- mountPath: /policy-dir
name: policy-volume
command:
- "/bin/descheduler"
args:
- "--policy-config-file"
- "/policy-dir/policy.yaml"
- "--v"
- "3"
resources:
requests:
cpu: "500m"
memory: "256Mi"
livenessProbe:
failureThreshold: 3
httpGet:
path: /healthz
port: 10258
scheme: HTTPS
initialDelaySeconds: 3
periodSeconds: 10
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
privileged: false
readOnlyRootFilesystem: true
runAsNonRoot: true
restartPolicy: "Never"
serviceAccountName: descheduler-sa
volumes:
- name: policy-volume
configMap:
name: descheduler-policy-configmap

View File

@@ -1,6 +0,0 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../base
- cronjob.yaml

View File

@@ -1,62 +0,0 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: descheduler
namespace: kube-system
labels:
app: descheduler
spec:
replicas: 1
selector:
matchLabels:
app: descheduler
template:
metadata:
labels:
app: descheduler
spec:
priorityClassName: system-cluster-critical
serviceAccountName: descheduler-sa
containers:
- name: descheduler
image: k8s.gcr.io/descheduler/descheduler:v0.25.0
imagePullPolicy: IfNotPresent
command:
- "/bin/descheduler"
args:
- "--policy-config-file"
- "/policy-dir/policy.yaml"
- "--descheduling-interval"
- "5m"
- "--v"
- "3"
ports:
- containerPort: 10258
protocol: TCP
livenessProbe:
failureThreshold: 3
httpGet:
path: /healthz
port: 10258
scheme: HTTPS
initialDelaySeconds: 3
periodSeconds: 10
resources:
requests:
cpu: 500m
memory: 256Mi
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
privileged: false
readOnlyRootFilesystem: true
runAsNonRoot: true
volumeMounts:
- mountPath: /policy-dir
name: policy-volume
volumes:
- name: policy-volume
configMap:
name: descheduler-policy-configmap

View File

@@ -1,6 +0,0 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../base
- deployment.yaml

33
kubernetes/job.yaml Normal file
View File

@@ -0,0 +1,33 @@
---
apiVersion: batch/v1
kind: Job
metadata:
name: descheduler-job
namespace: kube-system
spec:
parallelism: 1
completions: 1
template:
metadata:
name: descheduler-pod
spec:
priorityClassName: system-cluster-critical
containers:
- name: descheduler
image: us.gcr.io/k8s-artifacts-prod/descheduler/descheduler:v0.18.0
volumeMounts:
- mountPath: /policy-dir
name: policy-volume
command:
- "/bin/descheduler"
args:
- "--policy-config-file"
- "/policy-dir/policy.yaml"
- "--v"
- "3"
restartPolicy: "Never"
serviceAccountName: descheduler-sa
volumes:
- name: policy-volume
configMap:
name: descheduler-policy-configmap

View File

@@ -1,53 +0,0 @@
---
apiVersion: batch/v1
kind: Job
metadata:
name: descheduler-job
namespace: kube-system
spec:
parallelism: 1
completions: 1
template:
metadata:
name: descheduler-pod
spec:
priorityClassName: system-cluster-critical
containers:
- name: descheduler
image: k8s.gcr.io/descheduler/descheduler:v0.25.0
volumeMounts:
- mountPath: /policy-dir
name: policy-volume
command:
- "/bin/descheduler"
args:
- "--policy-config-file"
- "/policy-dir/policy.yaml"
- "--v"
- "3"
resources:
requests:
cpu: "500m"
memory: "256Mi"
livenessProbe:
failureThreshold: 3
httpGet:
path: /healthz
port: 10258
scheme: HTTPS
initialDelaySeconds: 3
periodSeconds: 10
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
privileged: false
readOnlyRootFilesystem: true
runAsNonRoot: true
restartPolicy: "Never"
serviceAccountName: descheduler-sa
volumes:
- name: policy-volume
configMap:
name: descheduler-policy-configmap

View File

@@ -1,6 +0,0 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../base
- job.yaml

40
kubernetes/rbac.yaml Normal file
View File

@@ -0,0 +1,40 @@
---
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: descheduler-cluster-role
namespace: kube-system
rules:
- apiGroups: [""]
resources: ["events"]
verbs: ["create", "update"]
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "watch", "list"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "watch", "list", "delete"]
- apiGroups: [""]
resources: ["pods/eviction"]
verbs: ["create"]
---
apiVersion: v1
kind: ServiceAccount
metadata:
name: descheduler-sa
namespace: kube-system
---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: descheduler-cluster-role-binding
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: descheduler-cluster-role
subjects:
- name: descheduler-sa
kind: ServiceAccount
namespace: kube-system

View File

@@ -1,72 +0,0 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"sync"
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
"sigs.k8s.io/descheduler/pkg/version"
)
const (
// DeschedulerSubsystem - subsystem name used by descheduler
DeschedulerSubsystem = "descheduler"
)
var (
PodsEvicted = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: DeschedulerSubsystem,
Name: "pods_evicted",
Help: "Number of evicted pods, by the result, by the strategy, by the namespace, by the node name. 'error' result means a pod could not be evicted",
StabilityLevel: metrics.ALPHA,
}, []string{"result", "strategy", "namespace", "node"})
buildInfo = metrics.NewGauge(
&metrics.GaugeOpts{
Subsystem: DeschedulerSubsystem,
Name: "build_info",
Help: "Build info about descheduler, including Go version, Descheduler version, Git SHA, Git branch",
ConstLabels: map[string]string{"GoVersion": version.Get().GoVersion, "AppVersion": version.Get().Major + "." + version.Get().Minor, "DeschedulerVersion": version.Get().GitVersion, "GitBranch": version.Get().GitBranch, "GitSha1": version.Get().GitSha1},
StabilityLevel: metrics.ALPHA,
},
)
metricsList = []metrics.Registerable{
PodsEvicted,
buildInfo,
}
)
var registerMetrics sync.Once
// Register all metrics.
func Register() {
// Register the metrics.
registerMetrics.Do(func() {
RegisterMetrics(metricsList...)
})
}
// RegisterMetrics registers a list of metrics.
func RegisterMetrics(extraMetrics ...metrics.Registerable) {
for _, metric := range extraMetrics {
legacyregistry.MustRegister(metric)
}
}

View File

@@ -19,6 +19,8 @@ package api
import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"sigs.k8s.io/descheduler/pkg/descheduler/scheme"
)
var (
@@ -33,6 +35,12 @@ const GroupName = "descheduler"
// SchemeGroupVersion is group version used to register these objects
var SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: runtime.APIVersionInternal}
func init() {
if err := addKnownTypes(scheme.Scheme); err != nil {
panic(err)
}
}
// Kind takes an unqualified kind and returns a Group qualified GroupKind
func Kind(kind string) schema.GroupKind {
return SchemeGroupVersion.WithKind(kind).GroupKind()

View File

@@ -28,27 +28,6 @@ type DeschedulerPolicy struct {
// Strategies
Strategies StrategyList
// NodeSelector for a set of nodes to operate over
NodeSelector *string
// EvictFailedBarePods allows pods without ownerReferences and in failed phase to be evicted.
EvictFailedBarePods *bool
// EvictLocalStoragePods allows pods using local storage to be evicted.
EvictLocalStoragePods *bool
// EvictSystemCriticalPods allows eviction of pods of any priority (including Kubernetes system pods)
EvictSystemCriticalPods *bool
// IgnorePVCPods prevents pods with PVCs from being evicted.
IgnorePVCPods *bool
// MaxNoOfPodsToEvictPerNode restricts maximum of pods to be evicted per node.
MaxNoOfPodsToEvictPerNode *uint
// MaxNoOfPodsToEvictPerNamespace restricts maximum of pods to be evicted per namespace.
MaxNoOfPodsToEvictPerNamespace *uint
}
type StrategyName string
@@ -65,42 +44,22 @@ type DeschedulerStrategy struct {
Params *StrategyParameters
}
// Namespaces carries a list of included/excluded namespaces
// for which a given strategy is applicable
type Namespaces struct {
Include []string
Exclude []string
}
// Besides Namespaces only one of its members may be specified
// TODO(jchaloup): move Namespaces ThresholdPriority and ThresholdPriorityClassName to individual strategies
//
// once the policy version is bumped to v1alpha2
// Only one of its members may be specified
type StrategyParameters struct {
NodeResourceUtilizationThresholds *NodeResourceUtilizationThresholds
NodeAffinityType []string
PodsHavingTooManyRestarts *PodsHavingTooManyRestarts
PodLifeTime *PodLifeTime
MaxPodLifeTimeSeconds *uint
RemoveDuplicates *RemoveDuplicates
FailedPods *FailedPods
IncludeSoftConstraints bool
Namespaces *Namespaces
ThresholdPriority *int32
ThresholdPriorityClassName string
LabelSelector *metav1.LabelSelector
NodeFit bool
IncludePreferNoSchedule bool
ExcludedTaints []string
}
type Percentage float64
type ResourceThresholds map[v1.ResourceName]Percentage
type NodeResourceUtilizationThresholds struct {
UseDeviationThresholds bool
Thresholds ResourceThresholds
TargetThresholds ResourceThresholds
NumberOfNodes int
Thresholds ResourceThresholds
TargetThresholds ResourceThresholds
NumberOfNodes int
}
type PodsHavingTooManyRestarts struct {
@@ -111,23 +70,3 @@ type PodsHavingTooManyRestarts struct {
type RemoveDuplicates struct {
ExcludeOwnerKinds []string
}
type PodLifeTime struct {
MaxPodLifeTimeSeconds *uint
States []string
// Deprecated: Use States instead.
PodStatusPhases []string
}
type FailedPods struct {
ExcludeOwnerKinds []string
MinPodLifetimeSeconds *uint
Reasons []string
IncludingInitContainers bool
}
type PriorityThreshold struct {
Value *int32
Name string
}

View File

@@ -28,27 +28,6 @@ type DeschedulerPolicy struct {
// Strategies
Strategies StrategyList `json:"strategies,omitempty"`
// NodeSelector for a set of nodes to operate over
NodeSelector *string `json:"nodeSelector,omitempty"`
// EvictFailedBarePods allows pods without ownerReferences and in failed phase to be evicted.
EvictFailedBarePods *bool `json:"evictFailedBarePods,omitempty"`
// EvictLocalStoragePods allows pods using local storage to be evicted.
EvictLocalStoragePods *bool `json:"evictLocalStoragePods,omitempty"`
// EvictSystemCriticalPods allows eviction of pods of any priority (including Kubernetes system pods)
EvictSystemCriticalPods *bool `json:"evictSystemCriticalPods,omitempty"`
// IgnorePVCPods prevents pods with PVCs from being evicted.
IgnorePVCPods *bool `json:"ignorePvcPods,omitempty"`
// MaxNoOfPodsToEvictPerNode restricts maximum of pods to be evicted per node.
MaxNoOfPodsToEvictPerNode *int `json:"maxNoOfPodsToEvictPerNode,omitempty"`
// MaxNoOfPodsToEvictPerNamespace restricts maximum of pods to be evicted per namespace.
MaxNoOfPodsToEvictPerNamespace *int `json:"maxNoOfPodsToEvictPerNamespace,omitempty"`
}
type StrategyName string
@@ -65,39 +44,22 @@ type DeschedulerStrategy struct {
Params *StrategyParameters `json:"params,omitempty"`
}
// Namespaces carries a list of included/excluded namespaces
// for which a given strategy is applicable.
type Namespaces struct {
Include []string `json:"include"`
Exclude []string `json:"exclude"`
}
// Besides Namespaces ThresholdPriority and ThresholdPriorityClassName only one of its members may be specified
// Only one of its members may be specified
type StrategyParameters struct {
NodeResourceUtilizationThresholds *NodeResourceUtilizationThresholds `json:"nodeResourceUtilizationThresholds,omitempty"`
NodeAffinityType []string `json:"nodeAffinityType,omitempty"`
PodsHavingTooManyRestarts *PodsHavingTooManyRestarts `json:"podsHavingTooManyRestarts,omitempty"`
PodLifeTime *PodLifeTime `json:"podLifeTime,omitempty"`
MaxPodLifeTimeSeconds *uint `json:"maxPodLifeTimeSeconds,omitempty"`
RemoveDuplicates *RemoveDuplicates `json:"removeDuplicates,omitempty"`
FailedPods *FailedPods `json:"failedPods,omitempty"`
IncludeSoftConstraints bool `json:"includeSoftConstraints"`
Namespaces *Namespaces `json:"namespaces"`
ThresholdPriority *int32 `json:"thresholdPriority"`
ThresholdPriorityClassName string `json:"thresholdPriorityClassName"`
LabelSelector *metav1.LabelSelector `json:"labelSelector"`
NodeFit bool `json:"nodeFit"`
IncludePreferNoSchedule bool `json:"includePreferNoSchedule"`
ExcludedTaints []string `json:"excludedTaints,omitempty"`
}
type Percentage float64
type ResourceThresholds map[v1.ResourceName]Percentage
type NodeResourceUtilizationThresholds struct {
UseDeviationThresholds bool `json:"useDeviationThresholds,omitempty"`
Thresholds ResourceThresholds `json:"thresholds,omitempty"`
TargetThresholds ResourceThresholds `json:"targetThresholds,omitempty"`
NumberOfNodes int `json:"numberOfNodes,omitempty"`
Thresholds ResourceThresholds `json:"thresholds,omitempty"`
TargetThresholds ResourceThresholds `json:"targetThresholds,omitempty"`
NumberOfNodes int `json:"numberOfNodes,omitempty"`
}
type PodsHavingTooManyRestarts struct {
@@ -108,18 +70,3 @@ type PodsHavingTooManyRestarts struct {
type RemoveDuplicates struct {
ExcludeOwnerKinds []string `json:"excludeOwnerKinds,omitempty"`
}
type PodLifeTime struct {
MaxPodLifeTimeSeconds *uint `json:"maxPodLifeTimeSeconds,omitempty"`
States []string `json:"states,omitempty"`
// Deprecated: Use States instead.
PodStatusPhases []string `json:"podStatusPhases,omitempty"`
}
type FailedPods struct {
ExcludeOwnerKinds []string `json:"excludeOwnerKinds,omitempty"`
MinPodLifetimeSeconds *uint `json:"minPodLifetimeSeconds,omitempty"`
Reasons []string `json:"reasons,omitempty"`
IncludingInitContainers bool `json:"includingInitContainers,omitempty"`
}

View File

@@ -1,8 +1,7 @@
//go:build !ignore_autogenerated
// +build !ignore_autogenerated
/*
Copyright 2022 The Kubernetes Authors.
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -24,7 +23,6 @@ package v1alpha1
import (
unsafe "unsafe"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
conversion "k8s.io/apimachinery/pkg/conversion"
runtime "k8s.io/apimachinery/pkg/runtime"
api "sigs.k8s.io/descheduler/pkg/api"
@@ -57,26 +55,6 @@ func RegisterConversions(s *runtime.Scheme) error {
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*FailedPods)(nil), (*api.FailedPods)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha1_FailedPods_To_api_FailedPods(a.(*FailedPods), b.(*api.FailedPods), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*api.FailedPods)(nil), (*FailedPods)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_api_FailedPods_To_v1alpha1_FailedPods(a.(*api.FailedPods), b.(*FailedPods), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*Namespaces)(nil), (*api.Namespaces)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha1_Namespaces_To_api_Namespaces(a.(*Namespaces), b.(*api.Namespaces), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*api.Namespaces)(nil), (*Namespaces)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_api_Namespaces_To_v1alpha1_Namespaces(a.(*api.Namespaces), b.(*Namespaces), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*NodeResourceUtilizationThresholds)(nil), (*api.NodeResourceUtilizationThresholds)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha1_NodeResourceUtilizationThresholds_To_api_NodeResourceUtilizationThresholds(a.(*NodeResourceUtilizationThresholds), b.(*api.NodeResourceUtilizationThresholds), scope)
}); err != nil {
@@ -87,16 +65,6 @@ func RegisterConversions(s *runtime.Scheme) error {
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*PodLifeTime)(nil), (*api.PodLifeTime)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha1_PodLifeTime_To_api_PodLifeTime(a.(*PodLifeTime), b.(*api.PodLifeTime), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*api.PodLifeTime)(nil), (*PodLifeTime)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_api_PodLifeTime_To_v1alpha1_PodLifeTime(a.(*api.PodLifeTime), b.(*PodLifeTime), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*PodsHavingTooManyRestarts)(nil), (*api.PodsHavingTooManyRestarts)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha1_PodsHavingTooManyRestarts_To_api_PodsHavingTooManyRestarts(a.(*PodsHavingTooManyRestarts), b.(*api.PodsHavingTooManyRestarts), scope)
}); err != nil {
@@ -132,25 +100,6 @@ func RegisterConversions(s *runtime.Scheme) error {
func autoConvert_v1alpha1_DeschedulerPolicy_To_api_DeschedulerPolicy(in *DeschedulerPolicy, out *api.DeschedulerPolicy, s conversion.Scope) error {
out.Strategies = *(*api.StrategyList)(unsafe.Pointer(&in.Strategies))
out.NodeSelector = (*string)(unsafe.Pointer(in.NodeSelector))
out.EvictFailedBarePods = (*bool)(unsafe.Pointer(in.EvictFailedBarePods))
out.EvictLocalStoragePods = (*bool)(unsafe.Pointer(in.EvictLocalStoragePods))
out.EvictSystemCriticalPods = (*bool)(unsafe.Pointer(in.EvictSystemCriticalPods))
out.IgnorePVCPods = (*bool)(unsafe.Pointer(in.IgnorePVCPods))
if in.MaxNoOfPodsToEvictPerNode != nil {
in, out := &in.MaxNoOfPodsToEvictPerNode, &out.MaxNoOfPodsToEvictPerNode
*out = new(uint)
**out = uint(**in)
} else {
out.MaxNoOfPodsToEvictPerNode = nil
}
if in.MaxNoOfPodsToEvictPerNamespace != nil {
in, out := &in.MaxNoOfPodsToEvictPerNamespace, &out.MaxNoOfPodsToEvictPerNamespace
*out = new(uint)
**out = uint(**in)
} else {
out.MaxNoOfPodsToEvictPerNamespace = nil
}
return nil
}
@@ -161,25 +110,6 @@ func Convert_v1alpha1_DeschedulerPolicy_To_api_DeschedulerPolicy(in *Descheduler
func autoConvert_api_DeschedulerPolicy_To_v1alpha1_DeschedulerPolicy(in *api.DeschedulerPolicy, out *DeschedulerPolicy, s conversion.Scope) error {
out.Strategies = *(*StrategyList)(unsafe.Pointer(&in.Strategies))
out.NodeSelector = (*string)(unsafe.Pointer(in.NodeSelector))
out.EvictFailedBarePods = (*bool)(unsafe.Pointer(in.EvictFailedBarePods))
out.EvictLocalStoragePods = (*bool)(unsafe.Pointer(in.EvictLocalStoragePods))
out.EvictSystemCriticalPods = (*bool)(unsafe.Pointer(in.EvictSystemCriticalPods))
out.IgnorePVCPods = (*bool)(unsafe.Pointer(in.IgnorePVCPods))
if in.MaxNoOfPodsToEvictPerNode != nil {
in, out := &in.MaxNoOfPodsToEvictPerNode, &out.MaxNoOfPodsToEvictPerNode
*out = new(int)
**out = int(**in)
} else {
out.MaxNoOfPodsToEvictPerNode = nil
}
if in.MaxNoOfPodsToEvictPerNamespace != nil {
in, out := &in.MaxNoOfPodsToEvictPerNamespace, &out.MaxNoOfPodsToEvictPerNamespace
*out = new(int)
**out = int(**in)
} else {
out.MaxNoOfPodsToEvictPerNamespace = nil
}
return nil
}
@@ -212,56 +142,7 @@ func Convert_api_DeschedulerStrategy_To_v1alpha1_DeschedulerStrategy(in *api.Des
return autoConvert_api_DeschedulerStrategy_To_v1alpha1_DeschedulerStrategy(in, out, s)
}
func autoConvert_v1alpha1_FailedPods_To_api_FailedPods(in *FailedPods, out *api.FailedPods, s conversion.Scope) error {
out.ExcludeOwnerKinds = *(*[]string)(unsafe.Pointer(&in.ExcludeOwnerKinds))
out.MinPodLifetimeSeconds = (*uint)(unsafe.Pointer(in.MinPodLifetimeSeconds))
out.Reasons = *(*[]string)(unsafe.Pointer(&in.Reasons))
out.IncludingInitContainers = in.IncludingInitContainers
return nil
}
// Convert_v1alpha1_FailedPods_To_api_FailedPods is an autogenerated conversion function.
func Convert_v1alpha1_FailedPods_To_api_FailedPods(in *FailedPods, out *api.FailedPods, s conversion.Scope) error {
return autoConvert_v1alpha1_FailedPods_To_api_FailedPods(in, out, s)
}
func autoConvert_api_FailedPods_To_v1alpha1_FailedPods(in *api.FailedPods, out *FailedPods, s conversion.Scope) error {
out.ExcludeOwnerKinds = *(*[]string)(unsafe.Pointer(&in.ExcludeOwnerKinds))
out.MinPodLifetimeSeconds = (*uint)(unsafe.Pointer(in.MinPodLifetimeSeconds))
out.Reasons = *(*[]string)(unsafe.Pointer(&in.Reasons))
out.IncludingInitContainers = in.IncludingInitContainers
return nil
}
// Convert_api_FailedPods_To_v1alpha1_FailedPods is an autogenerated conversion function.
func Convert_api_FailedPods_To_v1alpha1_FailedPods(in *api.FailedPods, out *FailedPods, s conversion.Scope) error {
return autoConvert_api_FailedPods_To_v1alpha1_FailedPods(in, out, s)
}
func autoConvert_v1alpha1_Namespaces_To_api_Namespaces(in *Namespaces, out *api.Namespaces, s conversion.Scope) error {
out.Include = *(*[]string)(unsafe.Pointer(&in.Include))
out.Exclude = *(*[]string)(unsafe.Pointer(&in.Exclude))
return nil
}
// Convert_v1alpha1_Namespaces_To_api_Namespaces is an autogenerated conversion function.
func Convert_v1alpha1_Namespaces_To_api_Namespaces(in *Namespaces, out *api.Namespaces, s conversion.Scope) error {
return autoConvert_v1alpha1_Namespaces_To_api_Namespaces(in, out, s)
}
func autoConvert_api_Namespaces_To_v1alpha1_Namespaces(in *api.Namespaces, out *Namespaces, s conversion.Scope) error {
out.Include = *(*[]string)(unsafe.Pointer(&in.Include))
out.Exclude = *(*[]string)(unsafe.Pointer(&in.Exclude))
return nil
}
// Convert_api_Namespaces_To_v1alpha1_Namespaces is an autogenerated conversion function.
func Convert_api_Namespaces_To_v1alpha1_Namespaces(in *api.Namespaces, out *Namespaces, s conversion.Scope) error {
return autoConvert_api_Namespaces_To_v1alpha1_Namespaces(in, out, s)
}
func autoConvert_v1alpha1_NodeResourceUtilizationThresholds_To_api_NodeResourceUtilizationThresholds(in *NodeResourceUtilizationThresholds, out *api.NodeResourceUtilizationThresholds, s conversion.Scope) error {
out.UseDeviationThresholds = in.UseDeviationThresholds
out.Thresholds = *(*api.ResourceThresholds)(unsafe.Pointer(&in.Thresholds))
out.TargetThresholds = *(*api.ResourceThresholds)(unsafe.Pointer(&in.TargetThresholds))
out.NumberOfNodes = in.NumberOfNodes
@@ -274,7 +155,6 @@ func Convert_v1alpha1_NodeResourceUtilizationThresholds_To_api_NodeResourceUtili
}
func autoConvert_api_NodeResourceUtilizationThresholds_To_v1alpha1_NodeResourceUtilizationThresholds(in *api.NodeResourceUtilizationThresholds, out *NodeResourceUtilizationThresholds, s conversion.Scope) error {
out.UseDeviationThresholds = in.UseDeviationThresholds
out.Thresholds = *(*ResourceThresholds)(unsafe.Pointer(&in.Thresholds))
out.TargetThresholds = *(*ResourceThresholds)(unsafe.Pointer(&in.TargetThresholds))
out.NumberOfNodes = in.NumberOfNodes
@@ -286,30 +166,6 @@ func Convert_api_NodeResourceUtilizationThresholds_To_v1alpha1_NodeResourceUtili
return autoConvert_api_NodeResourceUtilizationThresholds_To_v1alpha1_NodeResourceUtilizationThresholds(in, out, s)
}
func autoConvert_v1alpha1_PodLifeTime_To_api_PodLifeTime(in *PodLifeTime, out *api.PodLifeTime, s conversion.Scope) error {
out.MaxPodLifeTimeSeconds = (*uint)(unsafe.Pointer(in.MaxPodLifeTimeSeconds))
out.States = *(*[]string)(unsafe.Pointer(&in.States))
out.PodStatusPhases = *(*[]string)(unsafe.Pointer(&in.PodStatusPhases))
return nil
}
// Convert_v1alpha1_PodLifeTime_To_api_PodLifeTime is an autogenerated conversion function.
func Convert_v1alpha1_PodLifeTime_To_api_PodLifeTime(in *PodLifeTime, out *api.PodLifeTime, s conversion.Scope) error {
return autoConvert_v1alpha1_PodLifeTime_To_api_PodLifeTime(in, out, s)
}
func autoConvert_api_PodLifeTime_To_v1alpha1_PodLifeTime(in *api.PodLifeTime, out *PodLifeTime, s conversion.Scope) error {
out.MaxPodLifeTimeSeconds = (*uint)(unsafe.Pointer(in.MaxPodLifeTimeSeconds))
out.States = *(*[]string)(unsafe.Pointer(&in.States))
out.PodStatusPhases = *(*[]string)(unsafe.Pointer(&in.PodStatusPhases))
return nil
}
// Convert_api_PodLifeTime_To_v1alpha1_PodLifeTime is an autogenerated conversion function.
func Convert_api_PodLifeTime_To_v1alpha1_PodLifeTime(in *api.PodLifeTime, out *PodLifeTime, s conversion.Scope) error {
return autoConvert_api_PodLifeTime_To_v1alpha1_PodLifeTime(in, out, s)
}
func autoConvert_v1alpha1_PodsHavingTooManyRestarts_To_api_PodsHavingTooManyRestarts(in *PodsHavingTooManyRestarts, out *api.PodsHavingTooManyRestarts, s conversion.Scope) error {
out.PodRestartThreshold = in.PodRestartThreshold
out.IncludingInitContainers = in.IncludingInitContainers
@@ -356,17 +212,8 @@ func autoConvert_v1alpha1_StrategyParameters_To_api_StrategyParameters(in *Strat
out.NodeResourceUtilizationThresholds = (*api.NodeResourceUtilizationThresholds)(unsafe.Pointer(in.NodeResourceUtilizationThresholds))
out.NodeAffinityType = *(*[]string)(unsafe.Pointer(&in.NodeAffinityType))
out.PodsHavingTooManyRestarts = (*api.PodsHavingTooManyRestarts)(unsafe.Pointer(in.PodsHavingTooManyRestarts))
out.PodLifeTime = (*api.PodLifeTime)(unsafe.Pointer(in.PodLifeTime))
out.MaxPodLifeTimeSeconds = (*uint)(unsafe.Pointer(in.MaxPodLifeTimeSeconds))
out.RemoveDuplicates = (*api.RemoveDuplicates)(unsafe.Pointer(in.RemoveDuplicates))
out.FailedPods = (*api.FailedPods)(unsafe.Pointer(in.FailedPods))
out.IncludeSoftConstraints = in.IncludeSoftConstraints
out.Namespaces = (*api.Namespaces)(unsafe.Pointer(in.Namespaces))
out.ThresholdPriority = (*int32)(unsafe.Pointer(in.ThresholdPriority))
out.ThresholdPriorityClassName = in.ThresholdPriorityClassName
out.LabelSelector = (*v1.LabelSelector)(unsafe.Pointer(in.LabelSelector))
out.NodeFit = in.NodeFit
out.IncludePreferNoSchedule = in.IncludePreferNoSchedule
out.ExcludedTaints = *(*[]string)(unsafe.Pointer(&in.ExcludedTaints))
return nil
}
@@ -379,17 +226,8 @@ func autoConvert_api_StrategyParameters_To_v1alpha1_StrategyParameters(in *api.S
out.NodeResourceUtilizationThresholds = (*NodeResourceUtilizationThresholds)(unsafe.Pointer(in.NodeResourceUtilizationThresholds))
out.NodeAffinityType = *(*[]string)(unsafe.Pointer(&in.NodeAffinityType))
out.PodsHavingTooManyRestarts = (*PodsHavingTooManyRestarts)(unsafe.Pointer(in.PodsHavingTooManyRestarts))
out.PodLifeTime = (*PodLifeTime)(unsafe.Pointer(in.PodLifeTime))
out.MaxPodLifeTimeSeconds = (*uint)(unsafe.Pointer(in.MaxPodLifeTimeSeconds))
out.RemoveDuplicates = (*RemoveDuplicates)(unsafe.Pointer(in.RemoveDuplicates))
out.FailedPods = (*FailedPods)(unsafe.Pointer(in.FailedPods))
out.IncludeSoftConstraints = in.IncludeSoftConstraints
out.Namespaces = (*Namespaces)(unsafe.Pointer(in.Namespaces))
out.ThresholdPriority = (*int32)(unsafe.Pointer(in.ThresholdPriority))
out.ThresholdPriorityClassName = in.ThresholdPriorityClassName
out.LabelSelector = (*v1.LabelSelector)(unsafe.Pointer(in.LabelSelector))
out.NodeFit = in.NodeFit
out.IncludePreferNoSchedule = in.IncludePreferNoSchedule
out.ExcludedTaints = *(*[]string)(unsafe.Pointer(&in.ExcludedTaints))
return nil
}

View File

@@ -1,8 +1,7 @@
//go:build !ignore_autogenerated
// +build !ignore_autogenerated
/*
Copyright 2022 The Kubernetes Authors.
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -22,7 +21,6 @@ limitations under the License.
package v1alpha1
import (
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtime "k8s.io/apimachinery/pkg/runtime"
)
@@ -37,41 +35,6 @@ func (in *DeschedulerPolicy) DeepCopyInto(out *DeschedulerPolicy) {
(*out)[key] = *val.DeepCopy()
}
}
if in.NodeSelector != nil {
in, out := &in.NodeSelector, &out.NodeSelector
*out = new(string)
**out = **in
}
if in.EvictFailedBarePods != nil {
in, out := &in.EvictFailedBarePods, &out.EvictFailedBarePods
*out = new(bool)
**out = **in
}
if in.EvictLocalStoragePods != nil {
in, out := &in.EvictLocalStoragePods, &out.EvictLocalStoragePods
*out = new(bool)
**out = **in
}
if in.EvictSystemCriticalPods != nil {
in, out := &in.EvictSystemCriticalPods, &out.EvictSystemCriticalPods
*out = new(bool)
**out = **in
}
if in.IgnorePVCPods != nil {
in, out := &in.IgnorePVCPods, &out.IgnorePVCPods
*out = new(bool)
**out = **in
}
if in.MaxNoOfPodsToEvictPerNode != nil {
in, out := &in.MaxNoOfPodsToEvictPerNode, &out.MaxNoOfPodsToEvictPerNode
*out = new(int)
**out = **in
}
if in.MaxNoOfPodsToEvictPerNamespace != nil {
in, out := &in.MaxNoOfPodsToEvictPerNamespace, &out.MaxNoOfPodsToEvictPerNamespace
*out = new(int)
**out = **in
}
return
}
@@ -114,63 +77,6 @@ func (in *DeschedulerStrategy) DeepCopy() *DeschedulerStrategy {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *FailedPods) DeepCopyInto(out *FailedPods) {
*out = *in
if in.ExcludeOwnerKinds != nil {
in, out := &in.ExcludeOwnerKinds, &out.ExcludeOwnerKinds
*out = make([]string, len(*in))
copy(*out, *in)
}
if in.MinPodLifetimeSeconds != nil {
in, out := &in.MinPodLifetimeSeconds, &out.MinPodLifetimeSeconds
*out = new(uint)
**out = **in
}
if in.Reasons != nil {
in, out := &in.Reasons, &out.Reasons
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FailedPods.
func (in *FailedPods) DeepCopy() *FailedPods {
if in == nil {
return nil
}
out := new(FailedPods)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Namespaces) DeepCopyInto(out *Namespaces) {
*out = *in
if in.Include != nil {
in, out := &in.Include, &out.Include
*out = make([]string, len(*in))
copy(*out, *in)
}
if in.Exclude != nil {
in, out := &in.Exclude, &out.Exclude
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Namespaces.
func (in *Namespaces) DeepCopy() *Namespaces {
if in == nil {
return nil
}
out := new(Namespaces)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *NodeResourceUtilizationThresholds) DeepCopyInto(out *NodeResourceUtilizationThresholds) {
*out = *in
@@ -201,37 +107,6 @@ func (in *NodeResourceUtilizationThresholds) DeepCopy() *NodeResourceUtilization
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PodLifeTime) DeepCopyInto(out *PodLifeTime) {
*out = *in
if in.MaxPodLifeTimeSeconds != nil {
in, out := &in.MaxPodLifeTimeSeconds, &out.MaxPodLifeTimeSeconds
*out = new(uint)
**out = **in
}
if in.States != nil {
in, out := &in.States, &out.States
*out = make([]string, len(*in))
copy(*out, *in)
}
if in.PodStatusPhases != nil {
in, out := &in.PodStatusPhases, &out.PodStatusPhases
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodLifeTime.
func (in *PodLifeTime) DeepCopy() *PodLifeTime {
if in == nil {
return nil
}
out := new(PodLifeTime)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PodsHavingTooManyRestarts) DeepCopyInto(out *PodsHavingTooManyRestarts) {
*out = *in
@@ -331,41 +206,16 @@ func (in *StrategyParameters) DeepCopyInto(out *StrategyParameters) {
*out = new(PodsHavingTooManyRestarts)
**out = **in
}
if in.PodLifeTime != nil {
in, out := &in.PodLifeTime, &out.PodLifeTime
*out = new(PodLifeTime)
(*in).DeepCopyInto(*out)
if in.MaxPodLifeTimeSeconds != nil {
in, out := &in.MaxPodLifeTimeSeconds, &out.MaxPodLifeTimeSeconds
*out = new(uint)
**out = **in
}
if in.RemoveDuplicates != nil {
in, out := &in.RemoveDuplicates, &out.RemoveDuplicates
*out = new(RemoveDuplicates)
(*in).DeepCopyInto(*out)
}
if in.FailedPods != nil {
in, out := &in.FailedPods, &out.FailedPods
*out = new(FailedPods)
(*in).DeepCopyInto(*out)
}
if in.Namespaces != nil {
in, out := &in.Namespaces, &out.Namespaces
*out = new(Namespaces)
(*in).DeepCopyInto(*out)
}
if in.ThresholdPriority != nil {
in, out := &in.ThresholdPriority, &out.ThresholdPriority
*out = new(int32)
**out = **in
}
if in.LabelSelector != nil {
in, out := &in.LabelSelector, &out.LabelSelector
*out = new(v1.LabelSelector)
(*in).DeepCopyInto(*out)
}
if in.ExcludedTaints != nil {
in, out := &in.ExcludedTaints, &out.ExcludedTaints
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}

View File

@@ -1,8 +1,7 @@
//go:build !ignore_autogenerated
// +build !ignore_autogenerated
/*
Copyright 2022 The Kubernetes Authors.
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@@ -1,8 +1,7 @@
//go:build !ignore_autogenerated
// +build !ignore_autogenerated
/*
Copyright 2022 The Kubernetes Authors.
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -22,7 +21,6 @@ limitations under the License.
package api
import (
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtime "k8s.io/apimachinery/pkg/runtime"
)
@@ -37,41 +35,6 @@ func (in *DeschedulerPolicy) DeepCopyInto(out *DeschedulerPolicy) {
(*out)[key] = *val.DeepCopy()
}
}
if in.NodeSelector != nil {
in, out := &in.NodeSelector, &out.NodeSelector
*out = new(string)
**out = **in
}
if in.EvictFailedBarePods != nil {
in, out := &in.EvictFailedBarePods, &out.EvictFailedBarePods
*out = new(bool)
**out = **in
}
if in.EvictLocalStoragePods != nil {
in, out := &in.EvictLocalStoragePods, &out.EvictLocalStoragePods
*out = new(bool)
**out = **in
}
if in.EvictSystemCriticalPods != nil {
in, out := &in.EvictSystemCriticalPods, &out.EvictSystemCriticalPods
*out = new(bool)
**out = **in
}
if in.IgnorePVCPods != nil {
in, out := &in.IgnorePVCPods, &out.IgnorePVCPods
*out = new(bool)
**out = **in
}
if in.MaxNoOfPodsToEvictPerNode != nil {
in, out := &in.MaxNoOfPodsToEvictPerNode, &out.MaxNoOfPodsToEvictPerNode
*out = new(uint)
**out = **in
}
if in.MaxNoOfPodsToEvictPerNamespace != nil {
in, out := &in.MaxNoOfPodsToEvictPerNamespace, &out.MaxNoOfPodsToEvictPerNamespace
*out = new(uint)
**out = **in
}
return
}
@@ -114,63 +77,6 @@ func (in *DeschedulerStrategy) DeepCopy() *DeschedulerStrategy {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *FailedPods) DeepCopyInto(out *FailedPods) {
*out = *in
if in.ExcludeOwnerKinds != nil {
in, out := &in.ExcludeOwnerKinds, &out.ExcludeOwnerKinds
*out = make([]string, len(*in))
copy(*out, *in)
}
if in.MinPodLifetimeSeconds != nil {
in, out := &in.MinPodLifetimeSeconds, &out.MinPodLifetimeSeconds
*out = new(uint)
**out = **in
}
if in.Reasons != nil {
in, out := &in.Reasons, &out.Reasons
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FailedPods.
func (in *FailedPods) DeepCopy() *FailedPods {
if in == nil {
return nil
}
out := new(FailedPods)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Namespaces) DeepCopyInto(out *Namespaces) {
*out = *in
if in.Include != nil {
in, out := &in.Include, &out.Include
*out = make([]string, len(*in))
copy(*out, *in)
}
if in.Exclude != nil {
in, out := &in.Exclude, &out.Exclude
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Namespaces.
func (in *Namespaces) DeepCopy() *Namespaces {
if in == nil {
return nil
}
out := new(Namespaces)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *NodeResourceUtilizationThresholds) DeepCopyInto(out *NodeResourceUtilizationThresholds) {
*out = *in
@@ -201,37 +107,6 @@ func (in *NodeResourceUtilizationThresholds) DeepCopy() *NodeResourceUtilization
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PodLifeTime) DeepCopyInto(out *PodLifeTime) {
*out = *in
if in.MaxPodLifeTimeSeconds != nil {
in, out := &in.MaxPodLifeTimeSeconds, &out.MaxPodLifeTimeSeconds
*out = new(uint)
**out = **in
}
if in.States != nil {
in, out := &in.States, &out.States
*out = make([]string, len(*in))
copy(*out, *in)
}
if in.PodStatusPhases != nil {
in, out := &in.PodStatusPhases, &out.PodStatusPhases
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodLifeTime.
func (in *PodLifeTime) DeepCopy() *PodLifeTime {
if in == nil {
return nil
}
out := new(PodLifeTime)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PodsHavingTooManyRestarts) DeepCopyInto(out *PodsHavingTooManyRestarts) {
*out = *in
@@ -248,27 +123,6 @@ func (in *PodsHavingTooManyRestarts) DeepCopy() *PodsHavingTooManyRestarts {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PriorityThreshold) DeepCopyInto(out *PriorityThreshold) {
*out = *in
if in.Value != nil {
in, out := &in.Value, &out.Value
*out = new(int32)
**out = **in
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PriorityThreshold.
func (in *PriorityThreshold) DeepCopy() *PriorityThreshold {
if in == nil {
return nil
}
out := new(PriorityThreshold)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *RemoveDuplicates) DeepCopyInto(out *RemoveDuplicates) {
*out = *in
@@ -352,41 +206,16 @@ func (in *StrategyParameters) DeepCopyInto(out *StrategyParameters) {
*out = new(PodsHavingTooManyRestarts)
**out = **in
}
if in.PodLifeTime != nil {
in, out := &in.PodLifeTime, &out.PodLifeTime
*out = new(PodLifeTime)
(*in).DeepCopyInto(*out)
if in.MaxPodLifeTimeSeconds != nil {
in, out := &in.MaxPodLifeTimeSeconds, &out.MaxPodLifeTimeSeconds
*out = new(uint)
**out = **in
}
if in.RemoveDuplicates != nil {
in, out := &in.RemoveDuplicates, &out.RemoveDuplicates
*out = new(RemoveDuplicates)
(*in).DeepCopyInto(*out)
}
if in.FailedPods != nil {
in, out := &in.FailedPods, &out.FailedPods
*out = new(FailedPods)
(*in).DeepCopyInto(*out)
}
if in.Namespaces != nil {
in, out := &in.Namespaces, &out.Namespaces
*out = new(Namespaces)
(*in).DeepCopyInto(*out)
}
if in.ThresholdPriority != nil {
in, out := &in.ThresholdPriority, &out.ThresholdPriority
*out = new(int32)
**out = **in
}
if in.LabelSelector != nil {
in, out := &in.LabelSelector, &out.LabelSelector
*out = new(v1.LabelSelector)
(*in).DeepCopyInto(*out)
}
if in.ExcludedTaints != nil {
in, out := &in.ExcludedTaints, &out.ExcludedTaints
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}

View File

@@ -19,6 +19,8 @@ package componentconfig
import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"sigs.k8s.io/descheduler/pkg/descheduler/scheme"
)
var (
@@ -32,6 +34,12 @@ const GroupName = "deschedulercomponentconfig"
// SchemeGroupVersion is group version used to register these objects
var SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: runtime.APIVersionInternal}
func init() {
if err := addKnownTypes(scheme.Scheme); err != nil {
panic(err)
}
}
// Kind takes an unqualified kind and returns a Group qualified GroupKind
func Kind(kind string) schema.GroupKind {
return SchemeGroupVersion.WithKind(kind).GroupKind()

View File

@@ -20,8 +20,6 @@ import (
"time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
componentbaseconfig "k8s.io/component-base/config"
registry "k8s.io/component-base/logs/api/v1"
)
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
@@ -50,14 +48,4 @@ type DeschedulerConfiguration struct {
// EvictLocalStoragePods allows pods using local storage to be evicted.
EvictLocalStoragePods bool
// IgnorePVCPods sets whether PVC pods should be allowed to be evicted
IgnorePVCPods bool
// LeaderElection starts Deployment using leader election loop
LeaderElection componentbaseconfig.LeaderElectionConfiguration
// Logging specifies the options of logging.
// Refer [Logs Options](https://github.com/kubernetes/component-base/blob/master/logs/api/v1/options.go) for more information.
Logging registry.LoggingConfiguration
}

View File

@@ -1,133 +0,0 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package componentconfig
import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/descheduler/pkg/api"
)
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// RemovePodsViolatingNodeTaintsArgs holds arguments used to configure the RemovePodsViolatingNodeTaints plugin.
type RemovePodsViolatingNodeTaintsArgs struct {
metav1.TypeMeta
Namespaces *api.Namespaces
LabelSelector *metav1.LabelSelector
IncludePreferNoSchedule bool
ExcludedTaints []string
}
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// RemoveFailedPodsArgs holds arguments used to configure RemoveFailedPods plugin.
type RemoveFailedPodsArgs struct {
metav1.TypeMeta
Namespaces *api.Namespaces
LabelSelector *metav1.LabelSelector
ExcludeOwnerKinds []string
MinPodLifetimeSeconds *uint
Reasons []string
IncludingInitContainers bool
}
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// RemovePodsViolatingNodeAffinityArgs holds arguments used to configure RemovePodsViolatingNodeAffinity plugin.
type RemovePodsViolatingNodeAffinityArgs struct {
metav1.TypeMeta
Namespaces *api.Namespaces
LabelSelector *metav1.LabelSelector
NodeAffinityType []string
}
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// RemovePodsHavingTooManyRestartsArgs holds arguments used to configure RemovePodsHavingTooManyRestarts plugin.
type RemovePodsHavingTooManyRestartsArgs struct {
metav1.TypeMeta
Namespaces *api.Namespaces
LabelSelector *metav1.LabelSelector
PodRestartThreshold int32
IncludingInitContainers bool
}
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
type RemoveDuplicatesArgs struct {
metav1.TypeMeta
Namespaces *api.Namespaces
ExcludeOwnerKinds []string
}
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// PodLifeTimeArgs holds arguments used to configure PodLifeTime plugin.
type PodLifeTimeArgs struct {
metav1.TypeMeta
Namespaces *api.Namespaces
LabelSelector *metav1.LabelSelector
MaxPodLifeTimeSeconds *uint
States []string
}
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// RemovePodsViolatingTopologySpreadConstraintArgs holds arguments used to configure RemovePodsViolatingTopologySpreadConstraint plugin.
type RemovePodsViolatingTopologySpreadConstraintArgs struct {
metav1.TypeMeta
Namespaces *api.Namespaces
LabelSelector *metav1.LabelSelector
IncludeSoftConstraints bool
}
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
// RemovePodsViolatingInterPodAntiAffinity holds arguments used to configure RemovePodsViolatingInterPodAntiAffinity plugin.
type RemovePodsViolatingInterPodAntiAffinityArgs struct {
metav1.TypeMeta
Namespaces *api.Namespaces
LabelSelector *metav1.LabelSelector
}
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
type LowNodeUtilizationArgs struct {
metav1.TypeMeta
UseDeviationThresholds bool
Thresholds api.ResourceThresholds
TargetThresholds api.ResourceThresholds
NumberOfNodes int
}
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
type HighNodeUtilizationArgs struct {
metav1.TypeMeta
Thresholds api.ResourceThresholds
NumberOfNodes int
}

View File

@@ -20,8 +20,6 @@ import (
"time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
componentbaseconfig "k8s.io/component-base/config"
registry "k8s.io/component-base/logs/api/v1"
)
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
@@ -50,14 +48,4 @@ type DeschedulerConfiguration struct {
// EvictLocalStoragePods allows pods using local storage to be evicted.
EvictLocalStoragePods bool `json:"evictLocalStoragePods,omitempty"`
// IgnorePVCPods sets whether PVC pods should be allowed to be evicted
IgnorePVCPods bool `json:"ignorePvcPods,omitempty"`
// LeaderElection starts Deployment using leader election loop
LeaderElection componentbaseconfig.LeaderElectionConfiguration `json:"leaderElection,omitempty"`
// Logging specifies the options of logging.
// Refer [Logs Options](https://github.com/kubernetes/component-base/blob/master/logs/api/v1/options.go) for more information.
Logging registry.LoggingConfiguration `json:"logging,omitempty"`
}

View File

@@ -1,8 +1,7 @@
//go:build !ignore_autogenerated
// +build !ignore_autogenerated
/*
Copyright 2022 The Kubernetes Authors.
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -57,9 +56,6 @@ func autoConvert_v1alpha1_DeschedulerConfiguration_To_componentconfig_Deschedule
out.NodeSelector = in.NodeSelector
out.MaxNoOfPodsToEvictPerNode = in.MaxNoOfPodsToEvictPerNode
out.EvictLocalStoragePods = in.EvictLocalStoragePods
out.IgnorePVCPods = in.IgnorePVCPods
out.LeaderElection = in.LeaderElection
out.Logging = in.Logging
return nil
}
@@ -76,9 +72,6 @@ func autoConvert_componentconfig_DeschedulerConfiguration_To_v1alpha1_Deschedule
out.NodeSelector = in.NodeSelector
out.MaxNoOfPodsToEvictPerNode = in.MaxNoOfPodsToEvictPerNode
out.EvictLocalStoragePods = in.EvictLocalStoragePods
out.IgnorePVCPods = in.IgnorePVCPods
out.LeaderElection = in.LeaderElection
out.Logging = in.Logging
return nil
}

View File

@@ -1,8 +1,7 @@
//go:build !ignore_autogenerated
// +build !ignore_autogenerated
/*
Copyright 2022 The Kubernetes Authors.
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -29,8 +28,6 @@ import (
func (in *DeschedulerConfiguration) DeepCopyInto(out *DeschedulerConfiguration) {
*out = *in
out.TypeMeta = in.TypeMeta
out.LeaderElection = in.LeaderElection
in.Logging.DeepCopyInto(&out.Logging)
return
}

View File

@@ -1,8 +1,7 @@
//go:build !ignore_autogenerated
// +build !ignore_autogenerated
/*
Copyright 2022 The Kubernetes Authors.
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@@ -1,203 +0,0 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package validation
import (
"fmt"
v1 "k8s.io/api/core/v1"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/sets"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/apis/componentconfig"
)
const (
// MinResourcePercentage is the minimum value of a resource's percentage
MinResourcePercentage = 0
// MaxResourcePercentage is the maximum value of a resource's percentage
MaxResourcePercentage = 100
)
// ValidateRemovePodsViolatingNodeTaintsArgs validates RemovePodsViolatingNodeTaints arguments
func ValidateRemovePodsViolatingNodeTaintsArgs(args *componentconfig.RemovePodsViolatingNodeTaintsArgs) error {
return errorsAggregate(
validateNamespaceArgs(args.Namespaces),
validateLabelSelectorArgs(args.LabelSelector),
)
}
// ValidateRemoveFailedPodsArgs validates RemoveFailedPods arguments
func ValidateRemoveFailedPodsArgs(args *componentconfig.RemoveFailedPodsArgs) error {
return errorsAggregate(
validateNamespaceArgs(args.Namespaces),
validateLabelSelectorArgs(args.LabelSelector),
)
}
// ValidateRemovePodsHavingTooManyRestartsArgs validates RemovePodsHavingTooManyRestarts arguments
func ValidateRemovePodsHavingTooManyRestartsArgs(args *componentconfig.RemovePodsHavingTooManyRestartsArgs) error {
return errorsAggregate(
validateNamespaceArgs(args.Namespaces),
validateLabelSelectorArgs(args.LabelSelector),
validatePodRestartThreshold(args.PodRestartThreshold),
)
}
// ValidateRemovePodsViolatingNodeAffinityArgs validates RemovePodsViolatingNodeAffinity arguments
func ValidateRemovePodsViolatingNodeAffinityArgs(args *componentconfig.RemovePodsViolatingNodeAffinityArgs) error {
var err error
if args == nil || len(args.NodeAffinityType) == 0 {
err = fmt.Errorf("nodeAffinityType needs to be set")
}
return errorsAggregate(
err,
validateNamespaceArgs(args.Namespaces),
validateLabelSelectorArgs(args.LabelSelector),
)
}
// ValidatePodLifeTimeArgs validates PodLifeTime arguments
func ValidatePodLifeTimeArgs(args *componentconfig.PodLifeTimeArgs) error {
var err error
if args.MaxPodLifeTimeSeconds == nil {
err = fmt.Errorf("MaxPodLifeTimeSeconds not set")
}
return errorsAggregate(
err,
validateNamespaceArgs(args.Namespaces),
validateLabelSelectorArgs(args.LabelSelector),
validatePodLifeTimeStates(args.States),
)
}
func ValidateRemoveDuplicatesArgs(args *componentconfig.RemoveDuplicatesArgs) error {
return validateNamespaceArgs(args.Namespaces)
}
// ValidateRemovePodsViolatingTopologySpreadConstraintArgs validates RemovePodsViolatingTopologySpreadConstraint arguments
func ValidateRemovePodsViolatingTopologySpreadConstraintArgs(args *componentconfig.RemovePodsViolatingTopologySpreadConstraintArgs) error {
return errorsAggregate(
validateNamespaceArgs(args.Namespaces),
validateLabelSelectorArgs(args.LabelSelector),
)
}
// ValidateRemovePodsViolatingInterPodAntiAffinityArgs validates ValidateRemovePodsViolatingInterPodAntiAffinity arguments
func ValidateRemovePodsViolatingInterPodAntiAffinityArgs(args *componentconfig.RemovePodsViolatingInterPodAntiAffinityArgs) error {
return errorsAggregate(
validateNamespaceArgs(args.Namespaces),
validateLabelSelectorArgs(args.LabelSelector),
)
}
// errorsAggregate converts all arg validation errors to a single error interface.
// if no errors, it will return nil.
func errorsAggregate(errors ...error) error {
return utilerrors.NewAggregate(errors)
}
func validateNamespaceArgs(namespaces *api.Namespaces) error {
// At most one of include/exclude can be set
if namespaces != nil && len(namespaces.Include) > 0 && len(namespaces.Exclude) > 0 {
return fmt.Errorf("only one of Include/Exclude namespaces can be set")
}
return nil
}
func validateLabelSelectorArgs(labelSelector *metav1.LabelSelector) error {
if labelSelector != nil {
if _, err := metav1.LabelSelectorAsSelector(labelSelector); err != nil {
return fmt.Errorf("failed to get label selectors from strategy's params: %+v", err)
}
}
return nil
}
func validatePodRestartThreshold(podRestartThreshold int32) error {
if podRestartThreshold < 1 {
return fmt.Errorf("PodsHavingTooManyRestarts threshold not set")
}
return nil
}
func validatePodLifeTimeStates(states []string) error {
podLifeTimeAllowedStates := sets.NewString(
string(v1.PodRunning),
string(v1.PodPending),
// Container state reasons: https://github.com/kubernetes/kubernetes/blob/release-1.24/pkg/kubelet/kubelet_pods.go#L76-L79
"PodInitializing",
"ContainerCreating",
)
if !podLifeTimeAllowedStates.HasAll(states...) {
return fmt.Errorf("states must be one of %v", podLifeTimeAllowedStates.List())
}
return nil
}
func ValidateHighNodeUtilizationArgs(args *componentconfig.HighNodeUtilizationArgs) error {
return validateThresholds(args.Thresholds)
}
func ValidateLowNodeUtilizationArgs(args *componentconfig.LowNodeUtilizationArgs) error {
return validateLowNodeUtilizationThresholds(args.Thresholds, args.TargetThresholds, args.UseDeviationThresholds)
}
func validateLowNodeUtilizationThresholds(thresholds, targetThresholds api.ResourceThresholds, useDeviationThresholds bool) error {
// validate thresholds and targetThresholds config
if err := validateThresholds(thresholds); err != nil {
return fmt.Errorf("thresholds config is not valid: %v", err)
}
if err := validateThresholds(targetThresholds); err != nil {
return fmt.Errorf("targetThresholds config is not valid: %v", err)
}
// validate if thresholds and targetThresholds have same resources configured
if len(thresholds) != len(targetThresholds) {
return fmt.Errorf("thresholds and targetThresholds configured different resources")
}
for resourceName, value := range thresholds {
if targetValue, ok := targetThresholds[resourceName]; !ok {
return fmt.Errorf("thresholds and targetThresholds configured different resources")
} else if value > targetValue && !useDeviationThresholds {
return fmt.Errorf("thresholds' %v percentage is greater than targetThresholds'", resourceName)
}
}
return nil
}
// validateThresholds checks if thresholds have valid resource name and resource percentage configured
func validateThresholds(thresholds api.ResourceThresholds) error {
if len(thresholds) == 0 {
return fmt.Errorf("no resource threshold is configured")
}
for name, percent := range thresholds {
if percent < MinResourcePercentage || percent > MaxResourcePercentage {
return fmt.Errorf("%v threshold not in [%v, %v] range", name, MinResourcePercentage, MaxResourcePercentage)
}
}
return nil
}

View File

@@ -1,331 +0,0 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package validation
import (
"fmt"
"testing"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/apis/componentconfig"
)
func TestValidateRemovePodsViolatingNodeTaintsArgs(t *testing.T) {
testCases := []struct {
description string
args *componentconfig.RemovePodsViolatingNodeTaintsArgs
expectError bool
}{
{
description: "valid namespace args, no errors",
args: &componentconfig.RemovePodsViolatingNodeTaintsArgs{
Namespaces: &api.Namespaces{
Include: []string{"default"},
},
},
expectError: false,
},
{
description: "invalid namespaces args, expects error",
args: &componentconfig.RemovePodsViolatingNodeTaintsArgs{
Namespaces: &api.Namespaces{
Include: []string{"default"},
Exclude: []string{"kube-system"},
},
},
expectError: true,
},
{
description: "valid label selector args, no errors",
args: &componentconfig.RemovePodsViolatingNodeTaintsArgs{
LabelSelector: &metav1.LabelSelector{
MatchLabels: map[string]string{"role.kubernetes.io/node": ""},
},
},
expectError: false,
},
{
description: "invalid label selector args, expects errors",
args: &componentconfig.RemovePodsViolatingNodeTaintsArgs{
LabelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Operator: metav1.LabelSelectorOpIn,
},
},
},
},
expectError: true,
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
err := ValidateRemovePodsViolatingNodeTaintsArgs(tc.args)
hasError := err != nil
if tc.expectError != hasError {
t.Error("unexpected arg validation behavior")
}
})
}
}
func TestValidateRemovePodsViolatingNodeAffinityArgs(t *testing.T) {
testCases := []struct {
description string
args *componentconfig.RemovePodsViolatingNodeAffinityArgs
expectError bool
}{
{
description: "nil NodeAffinityType args, expects errors",
args: &componentconfig.RemovePodsViolatingNodeAffinityArgs{
NodeAffinityType: nil,
},
expectError: true,
},
{
description: "empty NodeAffinityType args, expects errors",
args: &componentconfig.RemovePodsViolatingNodeAffinityArgs{
NodeAffinityType: []string{},
},
expectError: true,
},
{
description: "valid NodeAffinityType args, no errors",
args: &componentconfig.RemovePodsViolatingNodeAffinityArgs{
NodeAffinityType: []string{"requiredDuringSchedulingIgnoredDuringExecution"},
},
expectError: false,
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
err := ValidateRemovePodsViolatingNodeAffinityArgs(tc.args)
hasError := err != nil
if tc.expectError != hasError {
t.Error("unexpected arg validation behavior")
}
})
}
}
func TestValidateRemovePodLifeTimeArgs(t *testing.T) {
testCases := []struct {
description string
args *componentconfig.PodLifeTimeArgs
expectError bool
}{
{
description: "valid arg, no errors",
args: &componentconfig.PodLifeTimeArgs{
MaxPodLifeTimeSeconds: func(i uint) *uint { return &i }(1),
States: []string{string(v1.PodRunning)},
},
expectError: false,
},
{
description: "nil MaxPodLifeTimeSeconds arg, expects errors",
args: &componentconfig.PodLifeTimeArgs{
MaxPodLifeTimeSeconds: nil,
},
expectError: true,
},
{
description: "invalid pod state arg, expects errors",
args: &componentconfig.PodLifeTimeArgs{
States: []string{string(v1.NodeRunning)},
},
expectError: true,
},
}
for _, tc := range testCases {
t.Run(tc.description, func(t *testing.T) {
err := ValidatePodLifeTimeArgs(tc.args)
hasError := err != nil
if tc.expectError != hasError {
t.Error("unexpected arg validation behavior")
}
})
}
}
func TestValidateLowNodeUtilizationPluginConfig(t *testing.T) {
var extendedResource = v1.ResourceName("example.com/foo")
tests := []struct {
name string
thresholds api.ResourceThresholds
targetThresholds api.ResourceThresholds
errInfo error
}{
{
name: "passing invalid thresholds",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourceMemory: 120,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
v1.ResourceMemory: 80,
},
errInfo: fmt.Errorf("thresholds config is not valid: %v", fmt.Errorf(
"%v threshold not in [%v, %v] range", v1.ResourceMemory, MinResourcePercentage, MaxResourcePercentage)),
},
{
name: "thresholds and targetThresholds configured different num of resources",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourceMemory: 20,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
v1.ResourceMemory: 80,
v1.ResourcePods: 80,
},
errInfo: fmt.Errorf("thresholds and targetThresholds configured different resources"),
},
{
name: "thresholds and targetThresholds configured different resources",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourceMemory: 20,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
v1.ResourcePods: 80,
},
errInfo: fmt.Errorf("thresholds and targetThresholds configured different resources"),
},
{
name: "thresholds' CPU config value is greater than targetThresholds'",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 90,
v1.ResourceMemory: 20,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
v1.ResourceMemory: 80,
},
errInfo: fmt.Errorf("thresholds' %v percentage is greater than targetThresholds'", v1.ResourceCPU),
},
{
name: "only thresholds configured extended resource",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourceMemory: 20,
extendedResource: 20,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
v1.ResourceMemory: 80,
},
errInfo: fmt.Errorf("thresholds and targetThresholds configured different resources"),
},
{
name: "only targetThresholds configured extended resource",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourceMemory: 20,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
v1.ResourceMemory: 80,
extendedResource: 80,
},
errInfo: fmt.Errorf("thresholds and targetThresholds configured different resources"),
},
{
name: "thresholds and targetThresholds configured different extended resources",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourceMemory: 20,
extendedResource: 20,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
v1.ResourceMemory: 80,
"example.com/bar": 80,
},
errInfo: fmt.Errorf("thresholds and targetThresholds configured different resources"),
},
{
name: "thresholds' extended resource config value is greater than targetThresholds'",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourceMemory: 20,
extendedResource: 90,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
v1.ResourceMemory: 80,
extendedResource: 20,
},
errInfo: fmt.Errorf("thresholds' %v percentage is greater than targetThresholds'", extendedResource),
},
{
name: "passing valid plugin config",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourceMemory: 20,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
v1.ResourceMemory: 80,
},
errInfo: nil,
},
{
name: "passing valid plugin config with extended resource",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourceMemory: 20,
extendedResource: 20,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
v1.ResourceMemory: 80,
extendedResource: 80,
},
errInfo: nil,
},
}
for _, testCase := range tests {
args := &componentconfig.LowNodeUtilizationArgs{
Thresholds: testCase.thresholds,
TargetThresholds: testCase.targetThresholds,
}
validateErr := validateLowNodeUtilizationThresholds(args.Thresholds, args.TargetThresholds, false)
if validateErr == nil || testCase.errInfo == nil {
if validateErr != testCase.errInfo {
t.Errorf("expected validity of plugin config: thresholds %#v targetThresholds %#v to be %v but got %v instead",
testCase.thresholds, testCase.targetThresholds, testCase.errInfo, validateErr)
}
} else if validateErr.Error() != testCase.errInfo.Error() {
t.Errorf("expected validity of plugin config: thresholds %#v targetThresholds %#v to be %v but got %v instead",
testCase.thresholds, testCase.targetThresholds, testCase.errInfo, validateErr)
}
}
}

View File

@@ -1,8 +1,7 @@
//go:build !ignore_autogenerated
// +build !ignore_autogenerated
/*
Copyright 2022 The Kubernetes Authors.
Copyright 2020 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -22,17 +21,13 @@ limitations under the License.
package componentconfig
import (
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtime "k8s.io/apimachinery/pkg/runtime"
api "sigs.k8s.io/descheduler/pkg/api"
)
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *DeschedulerConfiguration) DeepCopyInto(out *DeschedulerConfiguration) {
*out = *in
out.TypeMeta = in.TypeMeta
out.LeaderElection = in.LeaderElection
in.Logging.DeepCopyInto(&out.Logging)
return
}
@@ -53,389 +48,3 @@ func (in *DeschedulerConfiguration) DeepCopyObject() runtime.Object {
}
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *HighNodeUtilizationArgs) DeepCopyInto(out *HighNodeUtilizationArgs) {
*out = *in
out.TypeMeta = in.TypeMeta
if in.Thresholds != nil {
in, out := &in.Thresholds, &out.Thresholds
*out = make(api.ResourceThresholds, len(*in))
for key, val := range *in {
(*out)[key] = val
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new HighNodeUtilizationArgs.
func (in *HighNodeUtilizationArgs) DeepCopy() *HighNodeUtilizationArgs {
if in == nil {
return nil
}
out := new(HighNodeUtilizationArgs)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *HighNodeUtilizationArgs) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *LowNodeUtilizationArgs) DeepCopyInto(out *LowNodeUtilizationArgs) {
*out = *in
out.TypeMeta = in.TypeMeta
if in.Thresholds != nil {
in, out := &in.Thresholds, &out.Thresholds
*out = make(api.ResourceThresholds, len(*in))
for key, val := range *in {
(*out)[key] = val
}
}
if in.TargetThresholds != nil {
in, out := &in.TargetThresholds, &out.TargetThresholds
*out = make(api.ResourceThresholds, len(*in))
for key, val := range *in {
(*out)[key] = val
}
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new LowNodeUtilizationArgs.
func (in *LowNodeUtilizationArgs) DeepCopy() *LowNodeUtilizationArgs {
if in == nil {
return nil
}
out := new(LowNodeUtilizationArgs)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *LowNodeUtilizationArgs) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PodLifeTimeArgs) DeepCopyInto(out *PodLifeTimeArgs) {
*out = *in
out.TypeMeta = in.TypeMeta
if in.Namespaces != nil {
in, out := &in.Namespaces, &out.Namespaces
*out = new(api.Namespaces)
(*in).DeepCopyInto(*out)
}
if in.LabelSelector != nil {
in, out := &in.LabelSelector, &out.LabelSelector
*out = new(v1.LabelSelector)
(*in).DeepCopyInto(*out)
}
if in.MaxPodLifeTimeSeconds != nil {
in, out := &in.MaxPodLifeTimeSeconds, &out.MaxPodLifeTimeSeconds
*out = new(uint)
**out = **in
}
if in.States != nil {
in, out := &in.States, &out.States
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodLifeTimeArgs.
func (in *PodLifeTimeArgs) DeepCopy() *PodLifeTimeArgs {
if in == nil {
return nil
}
out := new(PodLifeTimeArgs)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *PodLifeTimeArgs) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *RemoveDuplicatesArgs) DeepCopyInto(out *RemoveDuplicatesArgs) {
*out = *in
out.TypeMeta = in.TypeMeta
if in.Namespaces != nil {
in, out := &in.Namespaces, &out.Namespaces
*out = new(api.Namespaces)
(*in).DeepCopyInto(*out)
}
if in.ExcludeOwnerKinds != nil {
in, out := &in.ExcludeOwnerKinds, &out.ExcludeOwnerKinds
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemoveDuplicatesArgs.
func (in *RemoveDuplicatesArgs) DeepCopy() *RemoveDuplicatesArgs {
if in == nil {
return nil
}
out := new(RemoveDuplicatesArgs)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *RemoveDuplicatesArgs) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *RemoveFailedPodsArgs) DeepCopyInto(out *RemoveFailedPodsArgs) {
*out = *in
out.TypeMeta = in.TypeMeta
if in.Namespaces != nil {
in, out := &in.Namespaces, &out.Namespaces
*out = new(api.Namespaces)
(*in).DeepCopyInto(*out)
}
if in.LabelSelector != nil {
in, out := &in.LabelSelector, &out.LabelSelector
*out = new(v1.LabelSelector)
(*in).DeepCopyInto(*out)
}
if in.ExcludeOwnerKinds != nil {
in, out := &in.ExcludeOwnerKinds, &out.ExcludeOwnerKinds
*out = make([]string, len(*in))
copy(*out, *in)
}
if in.MinPodLifetimeSeconds != nil {
in, out := &in.MinPodLifetimeSeconds, &out.MinPodLifetimeSeconds
*out = new(uint)
**out = **in
}
if in.Reasons != nil {
in, out := &in.Reasons, &out.Reasons
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemoveFailedPodsArgs.
func (in *RemoveFailedPodsArgs) DeepCopy() *RemoveFailedPodsArgs {
if in == nil {
return nil
}
out := new(RemoveFailedPodsArgs)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *RemoveFailedPodsArgs) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *RemovePodsHavingTooManyRestartsArgs) DeepCopyInto(out *RemovePodsHavingTooManyRestartsArgs) {
*out = *in
out.TypeMeta = in.TypeMeta
if in.Namespaces != nil {
in, out := &in.Namespaces, &out.Namespaces
*out = new(api.Namespaces)
(*in).DeepCopyInto(*out)
}
if in.LabelSelector != nil {
in, out := &in.LabelSelector, &out.LabelSelector
*out = new(v1.LabelSelector)
(*in).DeepCopyInto(*out)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemovePodsHavingTooManyRestartsArgs.
func (in *RemovePodsHavingTooManyRestartsArgs) DeepCopy() *RemovePodsHavingTooManyRestartsArgs {
if in == nil {
return nil
}
out := new(RemovePodsHavingTooManyRestartsArgs)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *RemovePodsHavingTooManyRestartsArgs) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *RemovePodsViolatingInterPodAntiAffinityArgs) DeepCopyInto(out *RemovePodsViolatingInterPodAntiAffinityArgs) {
*out = *in
out.TypeMeta = in.TypeMeta
if in.Namespaces != nil {
in, out := &in.Namespaces, &out.Namespaces
*out = new(api.Namespaces)
(*in).DeepCopyInto(*out)
}
if in.LabelSelector != nil {
in, out := &in.LabelSelector, &out.LabelSelector
*out = new(v1.LabelSelector)
(*in).DeepCopyInto(*out)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemovePodsViolatingInterPodAntiAffinityArgs.
func (in *RemovePodsViolatingInterPodAntiAffinityArgs) DeepCopy() *RemovePodsViolatingInterPodAntiAffinityArgs {
if in == nil {
return nil
}
out := new(RemovePodsViolatingInterPodAntiAffinityArgs)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *RemovePodsViolatingInterPodAntiAffinityArgs) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *RemovePodsViolatingNodeAffinityArgs) DeepCopyInto(out *RemovePodsViolatingNodeAffinityArgs) {
*out = *in
out.TypeMeta = in.TypeMeta
if in.Namespaces != nil {
in, out := &in.Namespaces, &out.Namespaces
*out = new(api.Namespaces)
(*in).DeepCopyInto(*out)
}
if in.LabelSelector != nil {
in, out := &in.LabelSelector, &out.LabelSelector
*out = new(v1.LabelSelector)
(*in).DeepCopyInto(*out)
}
if in.NodeAffinityType != nil {
in, out := &in.NodeAffinityType, &out.NodeAffinityType
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemovePodsViolatingNodeAffinityArgs.
func (in *RemovePodsViolatingNodeAffinityArgs) DeepCopy() *RemovePodsViolatingNodeAffinityArgs {
if in == nil {
return nil
}
out := new(RemovePodsViolatingNodeAffinityArgs)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *RemovePodsViolatingNodeAffinityArgs) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *RemovePodsViolatingNodeTaintsArgs) DeepCopyInto(out *RemovePodsViolatingNodeTaintsArgs) {
*out = *in
out.TypeMeta = in.TypeMeta
if in.Namespaces != nil {
in, out := &in.Namespaces, &out.Namespaces
*out = new(api.Namespaces)
(*in).DeepCopyInto(*out)
}
if in.LabelSelector != nil {
in, out := &in.LabelSelector, &out.LabelSelector
*out = new(v1.LabelSelector)
(*in).DeepCopyInto(*out)
}
if in.ExcludedTaints != nil {
in, out := &in.ExcludedTaints, &out.ExcludedTaints
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemovePodsViolatingNodeTaintsArgs.
func (in *RemovePodsViolatingNodeTaintsArgs) DeepCopy() *RemovePodsViolatingNodeTaintsArgs {
if in == nil {
return nil
}
out := new(RemovePodsViolatingNodeTaintsArgs)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *RemovePodsViolatingNodeTaintsArgs) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *RemovePodsViolatingTopologySpreadConstraintArgs) DeepCopyInto(out *RemovePodsViolatingTopologySpreadConstraintArgs) {
*out = *in
out.TypeMeta = in.TypeMeta
if in.Namespaces != nil {
in, out := &in.Namespaces, &out.Namespaces
*out = new(api.Namespaces)
(*in).DeepCopyInto(*out)
}
if in.LabelSelector != nil {
in, out := &in.LabelSelector, &out.LabelSelector
*out = new(v1.LabelSelector)
(*in).DeepCopyInto(*out)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RemovePodsViolatingTopologySpreadConstraintArgs.
func (in *RemovePodsViolatingTopologySpreadConstraintArgs) DeepCopy() *RemovePodsViolatingTopologySpreadConstraintArgs {
if in == nil {
return nil
}
out := new(RemovePodsViolatingTopologySpreadConstraintArgs)
in.DeepCopyInto(out)
return out
}
// DeepCopyObject is an autogenerated deepcopy function, copying the receiver, creating a new runtime.Object.
func (in *RemovePodsViolatingTopologySpreadConstraintArgs) DeepCopyObject() runtime.Object {
if c := in.DeepCopy(); c != nil {
return c
}
return nil
}

View File

@@ -26,7 +26,7 @@ import (
"k8s.io/client-go/tools/clientcmd"
)
func CreateClient(kubeconfig string, userAgt string) (clientset.Interface, error) {
func CreateClient(kubeconfig string) (clientset.Interface, error) {
var cfg *rest.Config
if len(kubeconfig) != 0 {
master, err := GetMasterFromKubeconfig(kubeconfig)
@@ -47,11 +47,7 @@ func CreateClient(kubeconfig string, userAgt string) (clientset.Interface, error
}
}
if len(userAgt) != 0 {
return clientset.NewForConfig(rest.AddUserAgent(cfg, userAgt))
} else {
return clientset.NewForConfig(cfg)
}
return clientset.NewForConfig(cfg)
}
func GetMasterFromKubeconfig(filename string) (string, error) {

View File

@@ -20,43 +20,28 @@ import (
"context"
"fmt"
"k8s.io/api/core/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
v1 "k8s.io/api/core/v1"
policy "k8s.io/api/policy/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/informers"
corev1informers "k8s.io/client-go/informers/core/v1"
schedulingv1informers "k8s.io/client-go/informers/scheduling/v1"
clientset "k8s.io/client-go/kubernetes"
fakeclientset "k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
"sigs.k8s.io/descheduler/metrics"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/client"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
eutils "sigs.k8s.io/descheduler/pkg/descheduler/evictions/utils"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/framework"
"sigs.k8s.io/descheduler/pkg/framework/plugins/defaultevictor"
"sigs.k8s.io/descheduler/pkg/utils"
"sigs.k8s.io/descheduler/pkg/descheduler/strategies"
)
func Run(ctx context.Context, rs *options.DeschedulerServer) error {
metrics.Register()
rsclient, eventClient, err := createClients(rs.KubeconfigFile)
func Run(rs *options.DeschedulerServer) error {
ctx := context.Background()
rsclient, err := client.CreateClient(rs.KubeconfigFile)
if err != nil {
return err
}
rs.Client = rsclient
rs.EventClient = eventClient
deschedulerPolicy, err := LoadPolicyConfig(rs.PolicyConfigFile)
if err != nil {
@@ -71,379 +56,63 @@ func Run(ctx context.Context, rs *options.DeschedulerServer) error {
return err
}
runFn := func() error {
return RunDeschedulerStrategies(ctx, rs, deschedulerPolicy, evictionPolicyGroupVersion)
}
if rs.LeaderElection.LeaderElect && rs.DeschedulingInterval.Seconds() == 0 {
return fmt.Errorf("leaderElection must be used with deschedulingInterval")
}
if rs.LeaderElection.LeaderElect && !rs.DryRun {
if err := NewLeaderElection(runFn, rsclient, &rs.LeaderElection, ctx); err != nil {
return fmt.Errorf("leaderElection: %w", err)
}
return nil
}
return runFn()
stopChannel := make(chan struct{})
return RunDeschedulerStrategies(ctx, rs, deschedulerPolicy, evictionPolicyGroupVersion, stopChannel)
}
type strategyFunction func(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor, evictorFilter framework.EvictorPlugin, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc)
type strategyFunction func(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor)
func cachedClient(
realClient clientset.Interface,
podInformer corev1informers.PodInformer,
nodeInformer corev1informers.NodeInformer,
namespaceInformer corev1informers.NamespaceInformer,
priorityClassInformer schedulingv1informers.PriorityClassInformer,
) (clientset.Interface, error) {
fakeClient := fakeclientset.NewSimpleClientset()
// simulate a pod eviction by deleting a pod
fakeClient.PrependReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) {
if action.GetSubresource() == "eviction" {
createAct, matched := action.(core.CreateActionImpl)
if !matched {
return false, nil, fmt.Errorf("unable to convert action to core.CreateActionImpl")
}
eviction, matched := createAct.Object.(*policy.Eviction)
if !matched {
return false, nil, fmt.Errorf("unable to convert action object into *policy.Eviction")
}
if err := fakeClient.Tracker().Delete(action.GetResource(), eviction.GetNamespace(), eviction.GetName()); err != nil {
return false, nil, fmt.Errorf("unable to delete pod %v/%v: %v", eviction.GetNamespace(), eviction.GetName(), err)
}
return true, nil, nil
}
// fallback to the default reactor
return false, nil, nil
})
klog.V(3).Infof("Pulling resources for the cached client from the cluster")
pods, err := podInformer.Lister().List(labels.Everything())
if err != nil {
return nil, fmt.Errorf("unable to list pods: %v", err)
}
for _, item := range pods {
if _, err := fakeClient.CoreV1().Pods(item.Namespace).Create(context.TODO(), item, metav1.CreateOptions{}); err != nil {
return nil, fmt.Errorf("unable to copy pod: %v", err)
}
}
nodes, err := nodeInformer.Lister().List(labels.Everything())
if err != nil {
return nil, fmt.Errorf("unable to list nodes: %v", err)
}
for _, item := range nodes {
if _, err := fakeClient.CoreV1().Nodes().Create(context.TODO(), item, metav1.CreateOptions{}); err != nil {
return nil, fmt.Errorf("unable to copy node: %v", err)
}
}
namespaces, err := namespaceInformer.Lister().List(labels.Everything())
if err != nil {
return nil, fmt.Errorf("unable to list namespaces: %v", err)
}
for _, item := range namespaces {
if _, err := fakeClient.CoreV1().Namespaces().Create(context.TODO(), item, metav1.CreateOptions{}); err != nil {
return nil, fmt.Errorf("unable to copy node: %v", err)
}
}
priorityClasses, err := priorityClassInformer.Lister().List(labels.Everything())
if err != nil {
return nil, fmt.Errorf("unable to list priorityclasses: %v", err)
}
for _, item := range priorityClasses {
if _, err := fakeClient.SchedulingV1().PriorityClasses().Create(context.TODO(), item, metav1.CreateOptions{}); err != nil {
return nil, fmt.Errorf("unable to copy priorityclass: %v", err)
}
}
return fakeClient, nil
}
// evictorImpl implements the Evictor interface so plugins
// can evict a pod without importing a specific pod evictor
type evictorImpl struct {
podEvictor *evictions.PodEvictor
evictorFilter framework.EvictorPlugin
}
var _ framework.Evictor = &evictorImpl{}
// Filter checks if a pod can be evicted
func (ei *evictorImpl) Filter(pod *v1.Pod) bool {
return ei.evictorFilter.Filter(pod)
}
// Evict evicts a pod (no pre-check performed)
func (ei *evictorImpl) Evict(ctx context.Context, pod *v1.Pod, opts evictions.EvictOptions) bool {
return ei.podEvictor.EvictPod(ctx, pod, opts)
}
func (ei *evictorImpl) NodeLimitExceeded(node *v1.Node) bool {
return ei.podEvictor.NodeLimitExceeded(node)
}
// handleImpl implements the framework handle which gets passed to plugins
type handleImpl struct {
clientSet clientset.Interface
getPodsAssignedToNodeFunc podutil.GetPodsAssignedToNodeFunc
sharedInformerFactory informers.SharedInformerFactory
evictor *evictorImpl
}
var _ framework.Handle = &handleImpl{}
// ClientSet retrieves kube client set
func (hi *handleImpl) ClientSet() clientset.Interface {
return hi.clientSet
}
// GetPodsAssignedToNodeFunc retrieves GetPodsAssignedToNodeFunc implementation
func (hi *handleImpl) GetPodsAssignedToNodeFunc() podutil.GetPodsAssignedToNodeFunc {
return hi.getPodsAssignedToNodeFunc
}
// SharedInformerFactory retrieves shared informer factory
func (hi *handleImpl) SharedInformerFactory() informers.SharedInformerFactory {
return hi.sharedInformerFactory
}
// Evictor retrieves evictor so plugins can filter and evict pods
func (hi *handleImpl) Evictor() framework.Evictor {
return hi.evictor
}
func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer, deschedulerPolicy *api.DeschedulerPolicy, evictionPolicyGroupVersion string) error {
func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer, deschedulerPolicy *api.DeschedulerPolicy, evictionPolicyGroupVersion string, stopChannel chan struct{}) error {
sharedInformerFactory := informers.NewSharedInformerFactory(rs.Client, 0)
nodeInformer := sharedInformerFactory.Core().V1().Nodes()
podInformer := sharedInformerFactory.Core().V1().Pods()
namespaceInformer := sharedInformerFactory.Core().V1().Namespaces()
priorityClassInformer := sharedInformerFactory.Scheduling().V1().PriorityClasses()
ctx, cancel := context.WithCancel(ctx)
defer cancel()
sharedInformerFactory.Start(stopChannel)
sharedInformerFactory.WaitForCacheSync(stopChannel)
// create the informers
namespaceInformer.Informer()
priorityClassInformer.Informer()
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {
return fmt.Errorf("build get pods assigned to node function error: %v", err)
strategyFuncs := map[string]strategyFunction{
"RemoveDuplicates": strategies.RemoveDuplicatePods,
"LowNodeUtilization": strategies.LowNodeUtilization,
"RemovePodsViolatingInterPodAntiAffinity": strategies.RemovePodsViolatingInterPodAntiAffinity,
"RemovePodsViolatingNodeAffinity": strategies.RemovePodsViolatingNodeAffinity,
"RemovePodsViolatingNodeTaints": strategies.RemovePodsViolatingNodeTaints,
"RemovePodsHavingTooManyRestarts": strategies.RemovePodsHavingTooManyRestarts,
"PodLifeTime": strategies.PodLifeTime,
}
sharedInformerFactory.Start(ctx.Done())
sharedInformerFactory.WaitForCacheSync(ctx.Done())
strategyFuncs := map[api.StrategyName]strategyFunction{
"RemoveDuplicates": nil,
"LowNodeUtilization": nil,
"HighNodeUtilization": nil,
"RemovePodsViolatingInterPodAntiAffinity": nil,
"RemovePodsViolatingNodeAffinity": nil,
"RemovePodsViolatingNodeTaints": nil,
"RemovePodsHavingTooManyRestarts": nil,
"PodLifeTime": nil,
"RemovePodsViolatingTopologySpreadConstraint": nil,
"RemoveFailedPods": nil,
}
var nodeSelector string
if deschedulerPolicy.NodeSelector != nil {
nodeSelector = *deschedulerPolicy.NodeSelector
}
var evictLocalStoragePods bool
if deschedulerPolicy.EvictLocalStoragePods != nil {
evictLocalStoragePods = *deschedulerPolicy.EvictLocalStoragePods
}
evictBarePods := false
if deschedulerPolicy.EvictFailedBarePods != nil {
evictBarePods = *deschedulerPolicy.EvictFailedBarePods
if evictBarePods {
klog.V(1).InfoS("Warning: EvictFailedBarePods is set to True. This could cause eviction of pods without ownerReferences.")
}
}
evictSystemCriticalPods := false
if deschedulerPolicy.EvictSystemCriticalPods != nil {
evictSystemCriticalPods = *deschedulerPolicy.EvictSystemCriticalPods
if evictSystemCriticalPods {
klog.V(1).InfoS("Warning: EvictSystemCriticalPods is set to True. This could cause eviction of Kubernetes system pods.")
}
}
ignorePvcPods := false
if deschedulerPolicy.IgnorePVCPods != nil {
ignorePvcPods = *deschedulerPolicy.IgnorePVCPods
}
var eventClient clientset.Interface
if rs.DryRun {
eventClient = fakeclientset.NewSimpleClientset()
} else {
eventClient = rs.Client
}
eventBroadcaster, eventRecorder := utils.GetRecorderAndBroadcaster(ctx, eventClient)
defer eventBroadcaster.Shutdown()
wait.NonSlidingUntil(func() {
nodes, err := nodeutil.ReadyNodes(ctx, rs.Client, nodeInformer, nodeSelector)
wait.Until(func() {
nodes, err := nodeutil.ReadyNodes(ctx, rs.Client, nodeInformer, rs.NodeSelector, stopChannel)
if err != nil {
klog.V(1).InfoS("Unable to get ready nodes", "err", err)
cancel()
klog.V(1).Infof("Unable to get ready nodes: %v", err)
close(stopChannel)
return
}
if len(nodes) <= 1 {
klog.V(1).InfoS("The cluster size is 0 or 1 meaning eviction causes service disruption or degradation. So aborting..")
cancel()
klog.V(1).Infof("The cluster size is 0 or 1 meaning eviction causes service disruption or degradation. So aborting..")
close(stopChannel)
return
}
var podEvictorClient clientset.Interface
// When the dry mode is enable, collect all the relevant objects (mostly pods) under a fake client.
// So when evicting pods while running multiple strategies in a row have the cummulative effect
// as is when evicting pods for real.
if rs.DryRun {
klog.V(3).Infof("Building a cached client from the cluster for the dry run")
// Create a new cache so we start from scratch without any leftovers
fakeClient, err := cachedClient(rs.Client, podInformer, nodeInformer, namespaceInformer, priorityClassInformer)
if err != nil {
klog.Error(err)
return
}
fakeSharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
getPodsAssignedToNode, err = podutil.BuildGetPodsAssignedToNodeFunc(fakeSharedInformerFactory.Core().V1().Pods())
if err != nil {
klog.Errorf("build get pods assigned to node function error: %v", err)
return
}
fakeCtx, cncl := context.WithCancel(context.TODO())
defer cncl()
fakeSharedInformerFactory.Start(fakeCtx.Done())
fakeSharedInformerFactory.WaitForCacheSync(fakeCtx.Done())
podEvictorClient = fakeClient
} else {
podEvictorClient = rs.Client
}
klog.V(3).Infof("Building a pod evictor")
podEvictor := evictions.NewPodEvictor(
podEvictorClient,
rs.Client,
evictionPolicyGroupVersion,
rs.DryRun,
deschedulerPolicy.MaxNoOfPodsToEvictPerNode,
deschedulerPolicy.MaxNoOfPodsToEvictPerNamespace,
rs.MaxNoOfPodsToEvictPerNode,
nodes,
!rs.DisableMetrics,
eventRecorder,
rs.EvictLocalStoragePods,
)
for name, strategy := range deschedulerPolicy.Strategies {
if f, ok := strategyFuncs[name]; ok {
if strategy.Enabled {
params := strategy.Params
if params == nil {
params = &api.StrategyParameters{}
}
nodeFit := false
if name != "PodLifeTime" {
nodeFit = params.NodeFit
}
// TODO(jchaloup): once all strategies are migrated move this check under
// the default evictor args validation
if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" {
klog.V(1).ErrorS(fmt.Errorf("priority threshold misconfigured"), "only one of priorityThreshold fields can be set", "pluginName", name)
continue
}
thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, rs.Client, strategy.Params)
if err != nil {
klog.ErrorS(err, "Failed to get threshold priority from strategy's params")
continue
}
defaultevictorArgs := &defaultevictor.DefaultEvictorArgs{
EvictLocalStoragePods: evictLocalStoragePods,
EvictSystemCriticalPods: evictSystemCriticalPods,
IgnorePvcPods: ignorePvcPods,
EvictFailedBarePods: evictBarePods,
NodeFit: nodeFit,
PriorityThreshold: &api.PriorityThreshold{
Value: &thresholdPriority,
},
}
evictorFilter, _ := defaultevictor.New(
defaultevictorArgs,
&handleImpl{
clientSet: rs.Client,
getPodsAssignedToNodeFunc: getPodsAssignedToNode,
sharedInformerFactory: sharedInformerFactory,
},
)
handle := &handleImpl{
clientSet: rs.Client,
getPodsAssignedToNodeFunc: getPodsAssignedToNode,
sharedInformerFactory: sharedInformerFactory,
evictor: &evictorImpl{
podEvictor: podEvictor,
evictorFilter: evictorFilter.(framework.EvictorPlugin),
},
}
// TODO: strategyName should be accessible from within the strategy using a framework
// handle or function which the Evictor has access to. For migration/in-progress framework
// work, we are currently passing this via context. To be removed
// (See discussion thread https://github.com/kubernetes-sigs/descheduler/pull/885#discussion_r919962292)
childCtx := context.WithValue(ctx, "strategyName", string(name))
if pgFnc, exists := pluginsMap[string(name)]; exists {
pgFnc(childCtx, nodes, params, handle)
} else {
f(childCtx, rs.Client, strategy, nodes, podEvictor, evictorFilter.(framework.EvictorPlugin), getPodsAssignedToNode)
}
}
} else {
klog.ErrorS(fmt.Errorf("unknown strategy name"), "skipping strategy", "strategy", name)
for name, f := range strategyFuncs {
if strategy := deschedulerPolicy.Strategies[api.StrategyName(name)]; strategy.Enabled {
f(ctx, rs.Client, strategy, nodes, podEvictor)
}
}
klog.V(1).InfoS("Number of evicted pods", "totalEvicted", podEvictor.TotalEvicted())
// If there was no interval specified, send a signal to the stopChannel to end the wait.Until loop after 1 iteration
if rs.DeschedulingInterval.Seconds() == 0 {
cancel()
close(stopChannel)
}
}, rs.DeschedulingInterval, ctx.Done())
}, rs.DeschedulingInterval, stopChannel)
return nil
}
func createClients(kubeconfig string) (clientset.Interface, clientset.Interface, error) {
kClient, err := client.CreateClient(kubeconfig, "descheduler")
if err != nil {
return nil, nil, err
}
eventClient, err := client.CreateClient(kubeconfig, "")
if err != nil {
return nil, nil, err
}
return kClient, eventClient, nil
}

View File

@@ -3,15 +3,14 @@ package descheduler
import (
"context"
"fmt"
"strings"
"testing"
"time"
v1 "k8s.io/api/core/v1"
policy "k8s.io/api/policy/v1"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/wait"
fakeclientset "k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/test"
@@ -28,7 +27,6 @@ func TestTaintsUpdated(t *testing.T) {
}
client := fakeclientset.NewSimpleClientset(n1, n2, p1)
eventClient := fakeclientset.NewSimpleClientset(n1, n2, p1)
dp := &api.DeschedulerPolicy{
Strategies: api.StrategyList{
"RemovePodsViolatingNodeTaints": api.DeschedulerStrategy{
@@ -37,13 +35,21 @@ func TestTaintsUpdated(t *testing.T) {
},
}
rs, err := options.NewDeschedulerServer()
if err != nil {
t.Fatalf("Unable to initialize server: %v", err)
}
rs.Client = client
rs.EventClient = eventClient
stopChannel := make(chan struct{})
defer close(stopChannel)
rs := options.NewDeschedulerServer()
rs.Client = client
rs.DeschedulingInterval = 100 * time.Millisecond
go func() {
err := RunDeschedulerStrategies(ctx, rs, dp, "v1beta1", stopChannel)
if err != nil {
t.Fatalf("Unable to run descheduler strategies: %v", err)
}
}()
// Wait for few cycles and then verify the only pod still exists
time.Sleep(300 * time.Millisecond)
pods, err := client.CoreV1().Pods(p1.Namespace).List(ctx, metav1.ListOptions{})
if err != nil {
t.Errorf("Unable to list pods: %v", err)
@@ -65,154 +71,24 @@ func TestTaintsUpdated(t *testing.T) {
t.Fatalf("Unable to update node: %v\n", err)
}
var evictedPods []string
client.PrependReactor("create", "pods", podEvictionReactionFuc(&evictedPods))
if err := RunDeschedulerStrategies(ctx, rs, dp, "v1"); err != nil {
t.Fatalf("Unable to run descheduler strategies: %v", err)
}
if len(evictedPods) != 1 {
t.Fatalf("Unable to evict pod, node taint did not get propagated to descheduler strategies %v\n", err)
}
}
func TestDuplicate(t *testing.T) {
ctx := context.Background()
node1 := test.BuildTestNode("n1", 2000, 3000, 10, nil)
node2 := test.BuildTestNode("n2", 2000, 3000, 10, nil)
p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil)
p1.Namespace = "dev"
p2 := test.BuildTestPod("p2", 100, 0, node1.Name, nil)
p2.Namespace = "dev"
p3 := test.BuildTestPod("p3", 100, 0, node1.Name, nil)
p3.Namespace = "dev"
ownerRef1 := test.GetReplicaSetOwnerRefList()
p1.ObjectMeta.OwnerReferences = ownerRef1
p2.ObjectMeta.OwnerReferences = ownerRef1
p3.ObjectMeta.OwnerReferences = ownerRef1
client := fakeclientset.NewSimpleClientset(node1, node2, p1, p2, p3)
eventClient := fakeclientset.NewSimpleClientset(node1, node2, p1, p2, p3)
dp := &api.DeschedulerPolicy{
Strategies: api.StrategyList{
"RemoveDuplicates": api.DeschedulerStrategy{
Enabled: true,
},
},
}
rs, err := options.NewDeschedulerServer()
if err != nil {
t.Fatalf("Unable to initialize server: %v", err)
}
rs.Client = client
rs.EventClient = eventClient
pods, err := client.CoreV1().Pods(p1.Namespace).List(ctx, metav1.ListOptions{})
if err != nil {
t.Errorf("Unable to list pods: %v", err)
}
if len(pods.Items) != 3 {
t.Errorf("Pods number should be 3 before evict")
}
var evictedPods []string
client.PrependReactor("create", "pods", podEvictionReactionFuc(&evictedPods))
if err := RunDeschedulerStrategies(ctx, rs, dp, "v1"); err != nil {
t.Fatalf("Unable to run descheduler strategies: %v", err)
}
if len(evictedPods) == 0 {
t.Fatalf("Unable to evict pod, node taint did not get propagated to descheduler strategies %v\n", err)
}
}
func TestRootCancel(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
n1 := test.BuildTestNode("n1", 2000, 3000, 10, nil)
n2 := test.BuildTestNode("n2", 2000, 3000, 10, nil)
client := fakeclientset.NewSimpleClientset(n1, n2)
eventClient := fakeclientset.NewSimpleClientset(n1, n2)
dp := &api.DeschedulerPolicy{
Strategies: api.StrategyList{}, // no strategies needed for this test
}
rs, err := options.NewDeschedulerServer()
if err != nil {
t.Fatalf("Unable to initialize server: %v", err)
}
rs.Client = client
rs.EventClient = eventClient
rs.DeschedulingInterval = 100 * time.Millisecond
errChan := make(chan error, 1)
defer close(errChan)
go func() {
err := RunDeschedulerStrategies(ctx, rs, dp, "v1")
errChan <- err
}()
cancel()
select {
case err := <-errChan:
if err != nil {
t.Fatalf("Unable to run descheduler strategies: %v", err)
}
case <-time.After(1 * time.Second):
t.Fatal("Root ctx should have canceled immediately")
}
}
func TestRootCancelWithNoInterval(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
n1 := test.BuildTestNode("n1", 2000, 3000, 10, nil)
n2 := test.BuildTestNode("n2", 2000, 3000, 10, nil)
client := fakeclientset.NewSimpleClientset(n1, n2)
eventClient := fakeclientset.NewSimpleClientset(n1, n2)
dp := &api.DeschedulerPolicy{
Strategies: api.StrategyList{}, // no strategies needed for this test
}
rs, err := options.NewDeschedulerServer()
if err != nil {
t.Fatalf("Unable to initialize server: %v", err)
}
rs.Client = client
rs.EventClient = eventClient
rs.DeschedulingInterval = 0
errChan := make(chan error, 1)
defer close(errChan)
go func() {
err := RunDeschedulerStrategies(ctx, rs, dp, "v1")
errChan <- err
}()
cancel()
select {
case err := <-errChan:
if err != nil {
t.Fatalf("Unable to run descheduler strategies: %v", err)
}
case <-time.After(1 * time.Second):
t.Fatal("Root ctx should have canceled immediately")
}
}
func podEvictionReactionFuc(evictedPods *[]string) func(action core.Action) (bool, runtime.Object, error) {
return func(action core.Action) (bool, runtime.Object, error) {
if action.GetSubresource() == "eviction" {
createAct, matched := action.(core.CreateActionImpl)
if !matched {
return false, nil, fmt.Errorf("unable to convert action to core.CreateActionImpl")
}
if eviction, matched := createAct.Object.(*policy.Eviction); matched {
*evictedPods = append(*evictedPods, eviction.GetName())
if err := wait.PollImmediate(100*time.Millisecond, time.Second, func() (bool, error) {
// Get over evicted pod result in panic
//pods, err := client.CoreV1().Pods(p1.Namespace).Get(p1.Name, metav1.GetOptions{})
// List is better, it does not panic.
// Though once the pod is evicted, List starts to error with "can't assign or convert v1beta1.Eviction into v1.Pod"
pods, err := client.CoreV1().Pods(p1.Namespace).List(ctx, metav1.ListOptions{})
if err == nil {
if len(pods.Items) > 0 {
return false, nil
}
return true, nil
}
return false, nil, nil // fallback to the default reactor
if strings.Contains(err.Error(), "can't assign or convert v1beta1.Eviction into v1.Pod") {
return true, nil
}
return false, nil
}); err != nil {
t.Fatalf("Unable to evict pod, node taint did not get propagated to descheduler strategies")
}
}

View File

@@ -19,158 +19,146 @@ package evictions
import (
"context"
"fmt"
"strings"
v1 "k8s.io/api/core/v1"
policy "k8s.io/api/policy/v1"
policy "k8s.io/api/policy/v1beta1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/errors"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/events"
"k8s.io/client-go/kubernetes/scheme"
clientcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/client-go/tools/record"
"k8s.io/klog/v2"
"sigs.k8s.io/descheduler/metrics"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
eutils "sigs.k8s.io/descheduler/pkg/descheduler/evictions/utils"
)
const (
evictPodAnnotationKey = "descheduler.alpha.kubernetes.io/evict"
)
// nodePodEvictedCount keeps count of pods evicted on node
type nodePodEvictedCount map[string]uint
type namespacePodEvictCount map[string]uint
type nodePodEvictedCount map[*v1.Node]int
type PodEvictor struct {
client clientset.Interface
nodes []*v1.Node
policyGroupVersion string
dryRun bool
maxPodsToEvictPerNode *uint
maxPodsToEvictPerNamespace *uint
nodepodCount nodePodEvictedCount
namespacePodCount namespacePodEvictCount
metricsEnabled bool
eventRecorder events.EventRecorder
client clientset.Interface
policyGroupVersion string
dryRun bool
maxPodsToEvictPerNode int
nodepodCount nodePodEvictedCount
evictLocalStoragePods bool
}
func NewPodEvictor(
client clientset.Interface,
policyGroupVersion string,
dryRun bool,
maxPodsToEvictPerNode *uint,
maxPodsToEvictPerNamespace *uint,
maxPodsToEvictPerNode int,
nodes []*v1.Node,
metricsEnabled bool,
eventRecorder events.EventRecorder,
evictLocalStoragePods bool,
) *PodEvictor {
var nodePodCount = make(nodePodEvictedCount)
var namespacePodCount = make(namespacePodEvictCount)
for _, node := range nodes {
// Initialize podsEvicted till now with 0.
nodePodCount[node.Name] = 0
nodePodCount[node] = 0
}
return &PodEvictor{
client: client,
nodes: nodes,
policyGroupVersion: policyGroupVersion,
dryRun: dryRun,
maxPodsToEvictPerNode: maxPodsToEvictPerNode,
maxPodsToEvictPerNamespace: maxPodsToEvictPerNamespace,
nodepodCount: nodePodCount,
namespacePodCount: namespacePodCount,
metricsEnabled: metricsEnabled,
eventRecorder: eventRecorder,
client: client,
policyGroupVersion: policyGroupVersion,
dryRun: dryRun,
maxPodsToEvictPerNode: maxPodsToEvictPerNode,
nodepodCount: nodePodCount,
evictLocalStoragePods: evictLocalStoragePods,
}
}
// IsEvictable checks if a pod is evictable or not.
func (pe *PodEvictor) IsEvictable(pod *v1.Pod) bool {
checkErrs := []error{}
if IsCriticalPod(pod) {
checkErrs = append(checkErrs, fmt.Errorf("pod is critical"))
}
ownerRefList := podutil.OwnerRef(pod)
if IsDaemonsetPod(ownerRefList) {
checkErrs = append(checkErrs, fmt.Errorf("pod is a DaemonSet pod"))
}
if len(ownerRefList) == 0 {
checkErrs = append(checkErrs, fmt.Errorf("pod does not have any ownerrefs"))
}
if !pe.evictLocalStoragePods && IsPodWithLocalStorage(pod) {
checkErrs = append(checkErrs, fmt.Errorf("pod has local storage and descheduler is not configured with --evict-local-storage-pods"))
}
if IsMirrorPod(pod) {
checkErrs = append(checkErrs, fmt.Errorf("pod is a mirror pod"))
}
if len(checkErrs) > 0 && !HaveEvictAnnotation(pod) {
klog.V(4).Infof("Pod %s in namespace %s is not evictable: Pod lacks an eviction annotation and fails the following checks: %v", pod.Name, pod.Namespace, errors.NewAggregate(checkErrs).Error())
return false
}
return true
}
// NodeEvicted gives a number of pods evicted for node
func (pe *PodEvictor) NodeEvicted(node *v1.Node) uint {
return pe.nodepodCount[node.Name]
func (pe *PodEvictor) NodeEvicted(node *v1.Node) int {
return pe.nodepodCount[node]
}
// TotalEvicted gives a number of pods evicted through all nodes
func (pe *PodEvictor) TotalEvicted() uint {
var total uint
func (pe *PodEvictor) TotalEvicted() int {
var total int
for _, count := range pe.nodepodCount {
total += count
}
return total
}
// NodeLimitExceeded checks if the number of evictions for a node was exceeded
func (pe *PodEvictor) NodeLimitExceeded(node *v1.Node) bool {
if pe.maxPodsToEvictPerNode != nil {
return pe.nodepodCount[node.Name] == *pe.maxPodsToEvictPerNode
// EvictPod returns non-nil error only when evicting a pod on a node is not
// possible (due to maxPodsToEvictPerNode constraint). Success is true when the pod
// is evicted on the server side.
func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, node *v1.Node, reasons ...string) (bool, error) {
var reason string
if len(reasons) > 0 {
reason = " (" + strings.Join(reasons, ", ") + ")"
}
return false
}
// EvictOptions provides a handle for passing additional info to EvictPod
type EvictOptions struct {
// Reason allows for passing details about the specific eviction for logging.
Reason string
}
// EvictPod evicts a pod while exercising eviction limits.
// Returns true when the pod is evicted on the server side.
func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, opts EvictOptions) bool {
// TODO: Replace context-propagated Strategy name with a defined framework handle for accessing Strategy info
strategy := ""
if ctx.Value("strategyName") != nil {
strategy = ctx.Value("strategyName").(string)
if pe.maxPodsToEvictPerNode > 0 && pe.nodepodCount[node]+1 > pe.maxPodsToEvictPerNode {
return false, fmt.Errorf("Maximum number %v of evicted pods per %q node reached", pe.maxPodsToEvictPerNode, node.Name)
}
if pod.Spec.NodeName != "" {
if pe.maxPodsToEvictPerNode != nil && pe.nodepodCount[pod.Spec.NodeName]+1 > *pe.maxPodsToEvictPerNode {
if pe.metricsEnabled {
metrics.PodsEvicted.With(map[string]string{"result": "maximum number of pods per node reached", "strategy": strategy, "namespace": pod.Namespace, "node": pod.Spec.NodeName}).Inc()
}
klog.ErrorS(fmt.Errorf("Maximum number of evicted pods per node reached"), "limit", *pe.maxPodsToEvictPerNode, "node", pod.Spec.NodeName)
return false
}
}
if pe.maxPodsToEvictPerNamespace != nil && pe.namespacePodCount[pod.Namespace]+1 > *pe.maxPodsToEvictPerNamespace {
if pe.metricsEnabled {
metrics.PodsEvicted.With(map[string]string{"result": "maximum number of pods per namespace reached", "strategy": strategy, "namespace": pod.Namespace, "node": pod.Spec.NodeName}).Inc()
}
klog.ErrorS(fmt.Errorf("Maximum number of evicted pods per namespace reached"), "limit", *pe.maxPodsToEvictPerNamespace, "namespace", pod.Namespace)
return false
}
err := evictPod(ctx, pe.client, pod, pe.policyGroupVersion)
err := evictPod(ctx, pe.client, pod, pe.policyGroupVersion, pe.dryRun)
if err != nil {
// err is used only for logging purposes
klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod), "reason", opts.Reason)
if pe.metricsEnabled {
metrics.PodsEvicted.With(map[string]string{"result": "error", "strategy": strategy, "namespace": pod.Namespace, "node": pod.Spec.NodeName}).Inc()
}
return false
}
if pod.Spec.NodeName != "" {
pe.nodepodCount[pod.Spec.NodeName]++
}
pe.namespacePodCount[pod.Namespace]++
if pe.metricsEnabled {
metrics.PodsEvicted.With(map[string]string{"result": "success", "strategy": strategy, "namespace": pod.Namespace, "node": pod.Spec.NodeName}).Inc()
klog.Errorf("Error evicting pod: %#v in namespace %#v%s: %#v", pod.Name, pod.Namespace, reason, err)
return false, nil
}
pe.nodepodCount[node]++
if pe.dryRun {
klog.V(1).InfoS("Evicted pod in dry run mode", "pod", klog.KObj(pod), "reason", opts.Reason, "strategy", strategy, "node", pod.Spec.NodeName)
klog.V(1).Infof("Evicted pod in dry run mode: %#v in namespace %#v%s", pod.Name, pod.Namespace, reason)
} else {
klog.V(1).InfoS("Evicted pod", "pod", klog.KObj(pod), "reason", opts.Reason, "strategy", strategy, "node", pod.Spec.NodeName)
reason := opts.Reason
if len(reason) == 0 {
reason = strategy
if len(reason) == 0 {
reason = "NotSet"
}
}
pe.eventRecorder.Eventf(pod, nil, v1.EventTypeNormal, reason, "Descheduled", "pod evicted by sigs.k8s.io/descheduler")
klog.V(1).Infof("Evicted pod: %#v in namespace %#v%s", pod.Name, pod.Namespace, reason)
eventBroadcaster := record.NewBroadcaster()
eventBroadcaster.StartLogging(klog.V(3).Infof)
eventBroadcaster.StartRecordingToSink(&clientcorev1.EventSinkImpl{Interface: pe.client.CoreV1().Events(pod.Namespace)})
r := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "sigs.k8s.io.descheduler"})
r.Event(pod, v1.EventTypeNormal, "Descheduled", fmt.Sprintf("pod evicted by sigs.k8s.io/descheduler%s", reason))
}
return true
return true, nil
}
func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, policyGroupVersion string) error {
func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, policyGroupVersion string, dryRun bool) error {
if dryRun {
return nil
}
deleteOptions := &metav1.DeleteOptions{}
// GracePeriodSeconds ?
eviction := &policy.Eviction{
@@ -184,7 +172,7 @@ func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, poli
},
DeleteOptions: deleteOptions,
}
err := client.PolicyV1().Evictions(eviction.Namespace).Evict(ctx, eviction)
err := client.PolicyV1beta1().Evictions(eviction.Namespace).Evict(ctx, eviction)
if apierrors.IsTooManyRequests(err) {
return fmt.Errorf("error when evicting pod (ignoring) %q: %v", pod.Name, err)
@@ -194,3 +182,37 @@ func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, poli
}
return err
}
func IsCriticalPod(pod *v1.Pod) bool {
return utils.IsCriticalPod(pod)
}
func IsDaemonsetPod(ownerRefList []metav1.OwnerReference) bool {
for _, ownerRef := range ownerRefList {
if ownerRef.Kind == "DaemonSet" {
return true
}
}
return false
}
// IsMirrorPod checks whether the pod is a mirror pod.
func IsMirrorPod(pod *v1.Pod) bool {
return utils.IsMirrorPod(pod)
}
// HaveEvictAnnotation checks if the pod have evict annotation
func HaveEvictAnnotation(pod *v1.Pod) bool {
_, found := pod.ObjectMeta.Annotations[evictPodAnnotationKey]
return found
}
func IsPodWithLocalStorage(pod *v1.Pod) bool {
for _, volume := range pod.Spec.Volumes {
if volume.HostPath != nil || volume.EmptyDir != nil {
return true
}
}
return false
}

View File

@@ -62,13 +62,169 @@ func TestEvictPod(t *testing.T) {
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
return true, &v1.PodList{Items: test.pods}, nil
})
got := evictPod(ctx, fakeClient, test.pod, "v1")
got := evictPod(ctx, fakeClient, test.pod, "v1", false)
if got != test.want {
t.Errorf("Test error for Desc: %s. Expected %v pod eviction to be %v, got %v", test.description, test.pod.Name, test.want, got)
}
}
}
func TestIsEvictable(t *testing.T) {
n1 := test.BuildTestNode("node1", 1000, 2000, 13, nil)
type testCase struct {
pod *v1.Pod
runBefore func(*v1.Pod)
evictLocalStoragePods bool
result bool
}
testCases := []testCase{
{
pod: test.BuildTestPod("p1", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
},
evictLocalStoragePods: false,
result: true,
}, {
pod: test.BuildTestPod("p2", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
},
evictLocalStoragePods: false,
result: true,
}, {
pod: test.BuildTestPod("p3", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
},
evictLocalStoragePods: false,
result: true,
}, {
pod: test.BuildTestPod("p4", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
},
evictLocalStoragePods: false,
result: true,
}, {
pod: test.BuildTestPod("p5", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
},
evictLocalStoragePods: false,
result: false,
}, {
pod: test.BuildTestPod("p6", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
},
evictLocalStoragePods: true,
result: true,
}, {
pod: test.BuildTestPod("p7", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
},
evictLocalStoragePods: false,
result: true,
}, {
pod: test.BuildTestPod("p8", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
},
evictLocalStoragePods: false,
result: false,
}, {
pod: test.BuildTestPod("p9", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
},
evictLocalStoragePods: false,
result: true,
}, {
pod: test.BuildTestPod("p10", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
pod.Annotations = test.GetMirrorPodAnnotation()
},
evictLocalStoragePods: false,
result: false,
}, {
pod: test.BuildTestPod("p11", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
pod.Annotations = test.GetMirrorPodAnnotation()
pod.Annotations["descheduler.alpha.kubernetes.io/evict"] = "true"
},
evictLocalStoragePods: false,
result: true,
}, {
pod: test.BuildTestPod("p12", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
},
evictLocalStoragePods: false,
result: false,
}, {
pod: test.BuildTestPod("p13", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
pod.Annotations = map[string]string{
"descheduler.alpha.kubernetes.io/evict": "true",
}
},
evictLocalStoragePods: false,
result: true,
},
}
for _, test := range testCases {
test.runBefore(test.pod)
podEvictor := &PodEvictor{
evictLocalStoragePods: test.evictLocalStoragePods,
}
result := podEvictor.IsEvictable(test.pod)
if result != test.result {
t.Errorf("IsEvictable should return for pod %s %t, but it returns %t", test.pod.Name, test.result, result)
}
}
}
func TestPodTypes(t *testing.T) {
n1 := test.BuildTestNode("node1", 1000, 2000, 9, nil)
p1 := test.BuildTestPod("p1", 400, 0, n1.Name, nil)
@@ -77,6 +233,10 @@ func TestPodTypes(t *testing.T) {
p2 := test.BuildTestPod("p2", 400, 0, n1.Name, nil)
p3 := test.BuildTestPod("p3", 400, 0, n1.Name, nil)
p4 := test.BuildTestPod("p4", 400, 0, n1.Name, nil)
p5 := test.BuildTestPod("p5", 400, 0, n1.Name, nil)
p6 := test.BuildTestPod("p6", 400, 0, n1.Name, nil)
p6.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
p1.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
// The following 4 pods won't get evicted.
@@ -97,18 +257,27 @@ func TestPodTypes(t *testing.T) {
}
// A Mirror Pod.
p4.Annotations = test.GetMirrorPodAnnotation()
if !utils.IsMirrorPod(p4) {
// A Critical Pod.
p5.Namespace = "kube-system"
priority := utils.SystemCriticalPriority
p5.Spec.Priority = &priority
systemCriticalPriority := utils.SystemCriticalPriority
p5.Spec.Priority = &systemCriticalPriority
if !IsMirrorPod(p4) {
t.Errorf("Expected p4 to be a mirror pod.")
}
if !utils.IsPodWithLocalStorage(p3) {
if !IsCriticalPod(p5) {
t.Errorf("Expected p5 to be a critical pod.")
}
if !IsPodWithLocalStorage(p3) {
t.Errorf("Expected p3 to be a pod with local storage.")
}
ownerRefList := podutil.OwnerRef(p2)
if !utils.IsDaemonsetPod(ownerRefList) {
if !IsDaemonsetPod(ownerRefList) {
t.Errorf("Expected p2 to be a daemonset pod.")
}
ownerRefList = podutil.OwnerRef(p1)
if utils.IsDaemonsetPod(ownerRefList) || utils.IsPodWithLocalStorage(p1) || utils.IsCriticalPriorityPod(p1) || utils.IsMirrorPod(p1) || utils.IsStaticPod(p1) {
if IsDaemonsetPod(ownerRefList) || IsPodWithLocalStorage(p1) || IsCriticalPod(p1) || IsMirrorPod(p1) {
t.Errorf("Expected p1 to be a normal pod.")
}

View File

@@ -1,99 +0,0 @@
/*
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package descheduler
import (
"context"
"fmt"
"k8s.io/apimachinery/pkg/util/uuid"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/leaderelection"
"k8s.io/client-go/tools/leaderelection/resourcelock"
componentbaseconfig "k8s.io/component-base/config"
"k8s.io/klog/v2"
"os"
)
// NewLeaderElection starts the leader election code loop
func NewLeaderElection(
run func() error,
client clientset.Interface,
LeaderElectionConfig *componentbaseconfig.LeaderElectionConfiguration,
ctx context.Context,
) error {
var id string
if hostname, err := os.Hostname(); err != nil {
// on errors, make sure we're unique
id = string(uuid.NewUUID())
} else {
// add a uniquifier so that two processes on the same host don't accidentally both become active
id = hostname + "_" + string(uuid.NewUUID())
}
klog.V(3).Infof("Assigned unique lease holder id: %s", id)
if len(LeaderElectionConfig.ResourceNamespace) == 0 {
return fmt.Errorf("namespace may not be empty")
}
if len(LeaderElectionConfig.ResourceName) == 0 {
return fmt.Errorf("name may not be empty")
}
lock, err := resourcelock.New(
LeaderElectionConfig.ResourceLock,
LeaderElectionConfig.ResourceNamespace,
LeaderElectionConfig.ResourceName,
client.CoreV1(),
client.CoordinationV1(),
resourcelock.ResourceLockConfig{
Identity: id,
},
)
if err != nil {
return fmt.Errorf("unable to create leader election lock: %v", err)
}
leaderelection.RunOrDie(ctx, leaderelection.LeaderElectionConfig{
Lock: lock,
ReleaseOnCancel: true,
LeaseDuration: LeaderElectionConfig.LeaseDuration.Duration,
RenewDeadline: LeaderElectionConfig.RenewDeadline.Duration,
RetryPeriod: LeaderElectionConfig.RetryPeriod.Duration,
Callbacks: leaderelection.LeaderCallbacks{
OnStartedLeading: func(ctx context.Context) {
klog.V(1).InfoS("Started leading")
err := run()
if err != nil {
klog.Error(err)
}
},
OnStoppedLeading: func() {
klog.V(1).InfoS("Leader lost")
},
OnNewLeader: func(identity string) {
// Just got the lock
if identity == id {
return
}
klog.V(1).Infof("New leader elected: %v", identity)
},
},
})
return nil
}

View File

@@ -18,22 +18,18 @@ package node
import (
"context"
"fmt"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
coreinformers "k8s.io/client-go/informers/core/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
)
// ReadyNodes returns ready nodes irrespective of whether they are
// schedulable or not.
func ReadyNodes(ctx context.Context, client clientset.Interface, nodeInformer coreinformers.NodeInformer, nodeSelector string) ([]*v1.Node, error) {
func ReadyNodes(ctx context.Context, client clientset.Interface, nodeInformer coreinformers.NodeInformer, nodeSelector string, stopChannel <-chan struct{}) ([]*v1.Node, error) {
ns, err := labels.Parse(nodeSelector)
if err != nil {
return []*v1.Node{}, err
@@ -46,7 +42,7 @@ func ReadyNodes(ctx context.Context, client clientset.Interface, nodeInformer co
}
if len(nodes) == 0 {
klog.V(2).InfoS("Node lister returned empty list, now fetch directly")
klog.V(2).Infof("node lister returned empty list, now fetch directly")
nItems, err := client.CoreV1().Nodes().List(ctx, metav1.ListOptions{LabelSelector: nodeSelector})
if err != nil {
@@ -81,210 +77,63 @@ func IsReady(node *v1.Node) bool {
// - NodeOutOfDisk condition status is ConditionFalse,
// - NodeNetworkUnavailable condition status is ConditionFalse.
if cond.Type == v1.NodeReady && cond.Status != v1.ConditionTrue {
klog.V(1).InfoS("Ignoring node", "node", klog.KObj(node), "condition", cond.Type, "status", cond.Status)
klog.V(1).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status)
return false
} /*else if cond.Type == v1.NodeOutOfDisk && cond.Status != v1.ConditionFalse {
klog.V(4).InfoS("Ignoring node with condition status", "node", klog.KObj(node.Name), "condition", cond.Type, "status", cond.Status)
klog.V(4).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status)
return false
} else if cond.Type == v1.NodeNetworkUnavailable && cond.Status != v1.ConditionFalse {
klog.V(4).InfoS("Ignoring node with condition status", "node", klog.KObj(node.Name), "condition", cond.Type, "status", cond.Status)
klog.V(4).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status)
return false
}*/
}
// Ignore nodes that are marked unschedulable
/*if node.Spec.Unschedulable {
klog.V(4).InfoS("Ignoring node since it is unschedulable", "node", klog.KObj(node.Name))
klog.V(4).Infof("Ignoring node %v since it is unschedulable", node.Name)
return false
}*/
return true
}
// NodeFit returns true if the provided pod can be scheduled onto the provided node.
// This function is used when the NodeFit pod filtering feature of the Descheduler is enabled.
// This function currently considers a subset of the Kubernetes Scheduler's predicates when
// deciding if a pod would fit on a node, but more predicates may be added in the future.
func NodeFit(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v1.Node) []error {
// Check node selector and required affinity
var errors []error
if ok, err := utils.PodMatchNodeSelector(pod, node); err != nil {
errors = append(errors, err)
} else if !ok {
errors = append(errors, fmt.Errorf("pod node selector does not match the node label"))
}
// Check taints (we only care about NoSchedule and NoExecute taints)
ok := utils.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, node.Spec.Taints, func(taint *v1.Taint) bool {
return taint.Effect == v1.TaintEffectNoSchedule || taint.Effect == v1.TaintEffectNoExecute
})
if !ok {
errors = append(errors, fmt.Errorf("pod does not tolerate taints on the node"))
}
// Check if the pod can fit on a node based off it's requests
if pod.Spec.NodeName == "" || pod.Spec.NodeName != node.Name {
ok, reqErrors := fitsRequest(nodeIndexer, pod, node)
if !ok {
errors = append(errors, reqErrors...)
}
}
// Check if node is schedulable
if IsNodeUnschedulable(node) {
errors = append(errors, fmt.Errorf("node is not schedulable"))
}
return errors
}
// PodFitsAnyOtherNode checks if the given pod will fit any of the given nodes, besides the node
// the pod is already running on. The predicates used to determine if the pod will fit can be found in the NodeFit function.
func PodFitsAnyOtherNode(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, nodes []*v1.Node) bool {
for _, node := range nodes {
// Skip node pod is already on
if node.Name == pod.Spec.NodeName {
continue
}
errors := NodeFit(nodeIndexer, pod, node)
if len(errors) == 0 {
klog.V(4).InfoS("Pod fits on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
return true
} else {
klog.V(4).InfoS("Pod does not fit on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
for _, err := range errors {
klog.V(4).InfoS(err.Error())
}
}
}
return false
}
// PodFitsAnyNode checks if the given pod will fit any of the given nodes. The predicates used
// to determine if the pod will fit can be found in the NodeFit function.
func PodFitsAnyNode(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, nodes []*v1.Node) bool {
for _, node := range nodes {
errors := NodeFit(nodeIndexer, pod, node)
if len(errors) == 0 {
klog.V(4).InfoS("Pod fits on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
return true
} else {
klog.V(4).InfoS("Pod does not fit on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
for _, err := range errors {
klog.V(4).InfoS(err.Error())
}
}
}
return false
}
// PodFitsCurrentNode checks if the given pod will fit onto the given node. The predicates used
// to determine if the pod will fit can be found in the NodeFit function.
func PodFitsCurrentNode(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v1.Node) bool {
errors := NodeFit(nodeIndexer, pod, node)
if len(errors) == 0 {
klog.V(4).InfoS("Pod fits on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
return true
} else {
klog.V(4).InfoS("Pod does not fit on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
for _, err := range errors {
klog.V(4).InfoS(err.Error())
}
}
return false
}
// IsNodeUnschedulable checks if the node is unschedulable. This is a helper function to check only in case of
// IsNodeUnschedulable checks if the node is unschedulable. This is helper function to check only in case of
// underutilized node so that they won't be accounted for.
func IsNodeUnschedulable(node *v1.Node) bool {
return node.Spec.Unschedulable
}
// fitsRequest determines if a pod can fit on a node based on its resource requests. It returns true if
// the pod will fit.
func fitsRequest(nodeIndexer podutil.GetPodsAssignedToNodeFunc, pod *v1.Pod, node *v1.Node) (bool, []error) {
var insufficientResources []error
// PodFitsAnyNode checks if the given pod fits any of the given nodes, based on
// multiple criteria, like, pod node selector matching the node label, node
// being schedulable or not.
func PodFitsAnyNode(pod *v1.Pod, nodes []*v1.Node) bool {
for _, node := range nodes {
// Get pod requests
podRequests, _ := utils.PodRequestsAndLimits(pod)
resourceNames := make([]v1.ResourceName, 0, len(podRequests))
for name := range podRequests {
resourceNames = append(resourceNames, name)
ok, err := utils.PodMatchNodeSelector(pod, node)
if err != nil || !ok {
continue
}
if !IsNodeUnschedulable(node) {
klog.V(2).Infof("Pod %v can possibly be scheduled on %v", pod.Name, node.Name)
return true
}
}
return false
}
// PodFitsCurrentNode checks if the given pod fits on the given node if the pod
// node selector matches the node label.
func PodFitsCurrentNode(pod *v1.Pod, node *v1.Node) bool {
ok, err := utils.PodMatchNodeSelector(pod, node)
availableResources, err := nodeAvailableResources(nodeIndexer, node, resourceNames)
if err != nil {
return false, []error{err}
}
podFitsOnNode := true
for _, resource := range resourceNames {
podResourceRequest := podRequests[resource]
availableResource, ok := availableResources[resource]
if !ok || podResourceRequest.MilliValue() > availableResource.MilliValue() {
insufficientResources = append(insufficientResources, fmt.Errorf("insufficient %v", resource))
podFitsOnNode = false
}
}
return podFitsOnNode, insufficientResources
}
// nodeAvailableResources returns resources mapped to the quanitity available on the node.
func nodeAvailableResources(nodeIndexer podutil.GetPodsAssignedToNodeFunc, node *v1.Node, resourceNames []v1.ResourceName) (map[v1.ResourceName]*resource.Quantity, error) {
podsOnNode, err := podutil.ListPodsOnANode(node.Name, nodeIndexer, nil)
if err != nil {
return nil, err
}
nodeUtilization := NodeUtilization(podsOnNode, resourceNames)
remainingResources := map[v1.ResourceName]*resource.Quantity{
v1.ResourceCPU: resource.NewMilliQuantity(node.Status.Allocatable.Cpu().MilliValue()-nodeUtilization[v1.ResourceCPU].MilliValue(), resource.DecimalSI),
v1.ResourceMemory: resource.NewQuantity(node.Status.Allocatable.Memory().Value()-nodeUtilization[v1.ResourceMemory].Value(), resource.BinarySI),
v1.ResourcePods: resource.NewQuantity(node.Status.Allocatable.Pods().Value()-nodeUtilization[v1.ResourcePods].Value(), resource.DecimalSI),
}
for _, name := range resourceNames {
if !IsBasicResource(name) {
if _, exists := node.Status.Allocatable[name]; exists {
allocatableResource := node.Status.Allocatable[name]
remainingResources[name] = resource.NewQuantity(allocatableResource.Value()-nodeUtilization[name].Value(), resource.DecimalSI)
} else {
remainingResources[name] = resource.NewQuantity(0, resource.DecimalSI)
}
}
}
return remainingResources, nil
}
// NodeUtilization returns the resources requested by the given pods. Only resources supplied in the resourceNames parameter are calculated.
func NodeUtilization(pods []*v1.Pod, resourceNames []v1.ResourceName) map[v1.ResourceName]*resource.Quantity {
totalReqs := map[v1.ResourceName]*resource.Quantity{
v1.ResourceCPU: resource.NewMilliQuantity(0, resource.DecimalSI),
v1.ResourceMemory: resource.NewQuantity(0, resource.BinarySI),
v1.ResourcePods: resource.NewQuantity(int64(len(pods)), resource.DecimalSI),
}
for _, name := range resourceNames {
if !IsBasicResource(name) {
totalReqs[name] = resource.NewQuantity(0, resource.DecimalSI)
}
}
for _, pod := range pods {
req, _ := utils.PodRequestsAndLimits(pod)
for _, name := range resourceNames {
quantity, ok := req[name]
if ok && name != v1.ResourcePods {
// As Quantity.Add says: Add adds the provided y quantity to the current value. If the current value is zero,
// the format of the quantity will be updated to the format of y.
totalReqs[name].Add(quantity)
}
}
}
return totalReqs
}
// IsBasicResource checks if resource is basic native.
func IsBasicResource(name v1.ResourceName) bool {
switch name {
case v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods:
return true
default:
klog.Error(err)
return false
}
if !ok {
klog.V(2).Infof("Pod %v does not fit on node %v", pod.Name, node.Name)
return false
}
klog.V(2).Infof("Pod %v fits on node %v", pod.Name, node.Name)
return true
}

View File

@@ -20,13 +20,10 @@ import (
"context"
"testing"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/test"
)
@@ -72,12 +69,12 @@ func TestReadyNodesWithNodeSelector(t *testing.T) {
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
nodeInformer := sharedInformerFactory.Core().V1().Nodes()
stopChannel := make(chan struct{})
stopChannel := make(chan struct{}, 0)
sharedInformerFactory.Start(stopChannel)
sharedInformerFactory.WaitForCacheSync(stopChannel)
defer close(stopChannel)
nodes, _ := ReadyNodes(ctx, fakeClient, nodeInformer, nodeSelector)
nodes, _ := ReadyNodes(ctx, fakeClient, nodeInformer, nodeSelector, nil)
if nodes[0].Name != "node1" {
t.Errorf("Expected node1, got %s", nodes[0].Name)
@@ -150,13 +147,13 @@ func TestPodFitsCurrentNode(t *testing.T) {
},
},
},
node: test.BuildTestNode("node1", 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
nodeLabelKey: nodeLabelValue,
},
},
},
success: true,
},
{
@@ -184,582 +181,217 @@ func TestPodFitsCurrentNode(t *testing.T) {
},
},
},
node: test.BuildTestNode("node1", 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: "no",
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
node: &v1.Node{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
nodeLabelKey: "no",
},
},
},
success: false,
},
}
for _, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
var objs []runtime.Object
objs = append(objs, tc.node)
objs = append(objs, tc.pod)
fakeClient := fake.NewSimpleClientset(objs...)
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
podInformer := sharedInformerFactory.Core().V1().Pods()
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {
t.Errorf("Build get pods assigned to node function error: %v", err)
}
sharedInformerFactory.Start(ctx.Done())
sharedInformerFactory.WaitForCacheSync(ctx.Done())
actual := PodFitsCurrentNode(getPodsAssignedToNode, tc.pod, tc.node)
if actual != tc.success {
t.Errorf("Test %#v failed", tc.description)
}
})
actual := PodFitsCurrentNode(tc.pod, tc.node)
if actual != tc.success {
t.Errorf("Test %#v failed", tc.description)
}
}
}
func TestPodFitsAnyOtherNode(t *testing.T) {
func TestPodFitsAnyNode(t *testing.T) {
nodeLabelKey := "kubernetes.io/desiredNode"
nodeLabelValue := "yes"
nodeTaintKey := "hardware"
nodeTaintValue := "gpu"
// Staging node has no scheduling restrictions, but the pod always starts here and PodFitsAnyOtherNode() doesn't take into account the node the pod is running on.
nodeNames := []string{"node1", "node2", "stagingNode"}
tests := []struct {
description string
pod *v1.Pod
nodes []*v1.Node
success bool
podsOnNodes []*v1.Pod
}{
{
description: "Pod fits another node matching node affinity",
pod: test.BuildTestPod("p1", 0, 0, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
nodes: []*v1.Node{
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: "no",
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
description: "Pod expected to fit one of the nodes",
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: nodeLabelKey,
Operator: "In",
Values: []string{
nodeLabelValue,
},
},
},
},
},
},
},
},
},
},
podsOnNodes: []*v1.Pod{},
success: true,
nodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
nodeLabelKey: nodeLabelValue,
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
nodeLabelKey: "no",
},
},
},
},
success: true,
},
{
description: "Pod expected to fit one of the nodes",
pod: test.BuildTestPod("p1", 0, 0, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
nodes: []*v1.Node{
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: "no",
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
description: "Pod expected to fit one of the nodes (error node first)",
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: nodeLabelKey,
Operator: "In",
Values: []string{
nodeLabelValue,
},
},
},
},
},
},
},
},
},
},
podsOnNodes: []*v1.Pod{},
success: true,
nodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
nodeLabelKey: "no",
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
nodeLabelKey: nodeLabelValue,
},
},
},
},
success: true,
},
{
description: "Pod expected to fit none of the nodes",
pod: test.BuildTestPod("p1", 0, 0, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
nodes: []*v1.Node{
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: "unfit1",
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: "unfit2",
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: nodeLabelKey,
Operator: "In",
Values: []string{
nodeLabelValue,
},
},
},
},
},
},
},
},
},
},
podsOnNodes: []*v1.Pod{},
success: false,
nodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
nodeLabelKey: "unfit1",
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
nodeLabelKey: "unfit2",
},
},
},
},
success: false,
},
{
description: "Nodes are unschedulable but labels match, should fail",
pod: test.BuildTestPod("p1", 0, 0, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
nodes: []*v1.Node{
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Unschedulable = true
}),
test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: "no",
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
},
podsOnNodes: []*v1.Pod{},
success: false,
},
{
description: "Both nodes are tained, should fail",
pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
}),
nodes: []*v1.Node{
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: nodeLabelKey,
Operator: "In",
Values: []string{
nodeLabelValue,
},
},
},
},
},
},
},
}
}),
test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
},
},
},
podsOnNodes: []*v1.Pod{},
success: false,
},
{
description: "Two nodes matches node selector, one of them is tained, there is a pod on the available node, and requests are low, should pass",
pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
}),
nodes: []*v1.Node{
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
}),
test.BuildTestNode(nodeNames[1], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
},
podsOnNodes: []*v1.Pod{
test.BuildTestPod("test-pod", 12*1000, 20*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
pod.ObjectMeta = metav1.ObjectMeta{
Namespace: "test",
{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"test": "true",
nodeLabelKey: nodeLabelValue,
},
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(40*1000*1000*1000, resource.DecimalSI)
}),
},
success: true,
},
{
description: "Two nodes matches node selector, one of them is tained, but CPU requests are too big, should fail",
pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
}),
nodes: []*v1.Node{
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
}),
// Notice that this node only has 4 cores, the pod already on the node below requests 3 cores, and the pod above requests 2 cores
test.BuildTestNode(nodeNames[1], 4000, 8*1000*1000*1000, 12, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
},
podsOnNodes: []*v1.Pod{
test.BuildTestPod("3-core-pod", 3000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
pod.ObjectMeta = metav1.ObjectMeta{
Namespace: "test",
},
Spec: v1.NodeSpec{
Unschedulable: true,
},
},
{
ObjectMeta: metav1.ObjectMeta{
Labels: map[string]string{
"test": "true",
nodeLabelKey: "no",
},
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
}),
},
success: false,
},
{
description: "Two nodes matches node selector, one of them is tained, but memory requests are too big, should fail",
pod: test.BuildTestPod("p1", 2000, 5*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
}),
nodes: []*v1.Node{
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
}),
// Notice that this node only has 8GB of memory, the pod already on the node below requests 4GB, and the pod above requests 5GB
test.BuildTestNode(nodeNames[1], 10*1000, 8*1000*1000*1000, 12, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
},
podsOnNodes: []*v1.Pod{
test.BuildTestPod("4GB-mem-pod", 2000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
pod.ObjectMeta = metav1.ObjectMeta{
Namespace: "test",
Labels: map[string]string{
"test": "true",
},
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
}),
},
success: false,
},
{
description: "Two nodes matches node selector, one of them is tained, but ephemeral storage requests are too big, should fail",
pod: test.BuildTestPod("p1", 2000, 4*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
}),
nodes: []*v1.Node{
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
}),
// Notice that this node only has 20GB of storage, the pod already on the node below requests 11GB, and the pod above requests 10GB
test.BuildTestNode(nodeNames[1], 10*1000, 8*1000*1000*1000, 12, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(20*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
},
podsOnNodes: []*v1.Pod{
test.BuildTestPod("11GB-storage-pod", 2000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
pod.ObjectMeta = metav1.ObjectMeta{
Namespace: "test",
Labels: map[string]string{
"test": "true",
},
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(11*1000*1000*1000, resource.DecimalSI)
}),
},
success: false,
},
{
description: "Two nodes matches node selector, one of them is tained, but custom resource requests are too big, should fail",
pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
pod.Spec.Containers[0].Resources.Requests["example.com/custom-resource"] = *resource.NewQuantity(10, resource.DecimalSI)
}),
nodes: []*v1.Node{
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
node.Status.Allocatable["example.com/custom-resource"] = *resource.NewQuantity(15, resource.DecimalSI)
}),
// Notice that this node only has 15 of the custom resource, the pod already on the node below requests 10, and the pod above requests 10
test.BuildTestNode(nodeNames[1], 10*1000, 8*1000*1000*1000, 12, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI)
node.Status.Allocatable["example.com/custom-resource"] = *resource.NewQuantity(15, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
},
podsOnNodes: []*v1.Pod{
test.BuildTestPod("10-custom-resource-pod", 0, 0, nodeNames[1], func(pod *v1.Pod) {
pod.ObjectMeta = metav1.ObjectMeta{
Namespace: "test",
Labels: map[string]string{
"test": "true",
},
}
pod.Spec.Containers[0].Resources.Requests["example.com/custom-resource"] = *resource.NewQuantity(10, resource.DecimalSI)
}),
},
success: false,
},
{
description: "Two nodes matches node selector, one of them is tained, CPU requests will fit, and pod Overhead is low enough, should pass",
pod: test.BuildTestPod("p1", 1000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
}),
nodes: []*v1.Node{
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
}),
// Notice that this node has 5 CPU cores, the pod below requests 2 cores, and has CPU overhead of 1 cores, and the pod above requests 1 core
test.BuildTestNode(nodeNames[1], 5000, 8*1000*1000*1000, 12, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
},
podsOnNodes: []*v1.Pod{
test.BuildTestPod("3-core-pod", 2000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
pod.ObjectMeta = metav1.ObjectMeta{
Namespace: "test",
Labels: map[string]string{
"test": "true",
},
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
pod.Spec.Overhead = createResourceList(1000, 1000*1000*1000, 1000*1000*1000)
}),
},
success: true,
},
{
description: "Two nodes matches node selector, one of them is tained, CPU requests will fit, but pod Overhead is too high, should fail",
pod: test.BuildTestPod("p1", 2000, 2*1000*1000*1000, nodeNames[2], func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
}),
nodes: []*v1.Node{
test.BuildTestNode(nodeNames[0], 64000, 128*1000*1000*1000, 200, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(1000*1000*1000*1000, resource.DecimalSI)
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
}),
// Notice that this node only has 5 CPU cores, the pod below requests 2 cores, but has CPU overhead of 2 cores, and the pod above requests 2 cores
test.BuildTestNode(nodeNames[1], 5000, 8*1000*1000*1000, 12, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
node.Status.Allocatable[v1.ResourceEphemeralStorage] = *resource.NewQuantity(200*1000*1000*1000, resource.DecimalSI)
}),
test.BuildTestNode(nodeNames[2], 0, 0, 0, nil),
},
podsOnNodes: []*v1.Pod{
test.BuildTestPod("3-core-pod", 2000, 4*1000*1000*1000, nodeNames[1], func(pod *v1.Pod) {
pod.ObjectMeta = metav1.ObjectMeta{
Namespace: "test",
Labels: map[string]string{
"test": "true",
},
}
pod.Spec.Containers[0].Resources.Requests[v1.ResourceEphemeralStorage] = *resource.NewQuantity(10*1000*1000*1000, resource.DecimalSI)
pod.Spec.Overhead = createResourceList(2000, 1000*1000*1000, 1000*1000*1000)
}),
},
},
},
success: false,
},
}
for _, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
var objs []runtime.Object
for _, node := range tc.nodes {
objs = append(objs, node)
}
for _, pod := range tc.podsOnNodes {
objs = append(objs, pod)
}
objs = append(objs, tc.pod)
fakeClient := fake.NewSimpleClientset(objs...)
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
podInformer := sharedInformerFactory.Core().V1().Pods()
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {
t.Errorf("Build get pods assigned to node function error: %v", err)
}
sharedInformerFactory.Start(ctx.Done())
sharedInformerFactory.WaitForCacheSync(ctx.Done())
actual := PodFitsAnyOtherNode(getPodsAssignedToNode, tc.pod, tc.nodes)
if actual != tc.success {
t.Errorf("Test %#v failed", tc.description)
}
})
actual := PodFitsAnyNode(tc.pod, tc.nodes)
if actual != tc.success {
t.Errorf("Test %#v failed", tc.description)
}
}
}
// createResourceList builds a small resource list of core resources
func createResourceList(cpu int64, memory int64, ephemeralStorage int64) v1.ResourceList {
resourceList := make(map[v1.ResourceName]resource.Quantity)
resourceList[v1.ResourceCPU] = *resource.NewMilliQuantity(cpu, resource.DecimalSI)
resourceList[v1.ResourceMemory] = *resource.NewQuantity(memory, resource.DecimalSI)
resourceList[v1.ResourceEphemeralStorage] = *resource.NewQuantity(ephemeralStorage, resource.DecimalSI)
return resourceList
}

View File

@@ -17,174 +17,39 @@ limitations under the License.
package pod
import (
"sort"
"context"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
coreinformers "k8s.io/client-go/informers/core/v1"
"k8s.io/client-go/tools/cache"
"k8s.io/apimachinery/pkg/fields"
clientset "k8s.io/client-go/kubernetes"
"sigs.k8s.io/descheduler/pkg/utils"
"sort"
)
const (
nodeNameKeyIndex = "spec.nodeName"
)
// FilterFunc is a filter for a pod.
type FilterFunc func(*v1.Pod) bool
// GetPodsAssignedToNodeFunc is a function which accept a node name and a pod filter function
// as input and returns the pods that assigned to the node.
type GetPodsAssignedToNodeFunc func(string, FilterFunc) ([]*v1.Pod, error)
// WrapFilterFuncs wraps a set of FilterFunc in one.
func WrapFilterFuncs(filters ...FilterFunc) FilterFunc {
return func(pod *v1.Pod) bool {
for _, filter := range filters {
if filter != nil && !filter(pod) {
return false
}
}
return true
}
}
type Options struct {
filter FilterFunc
includedNamespaces sets.String
excludedNamespaces sets.String
labelSelector *metav1.LabelSelector
}
// NewOptions returns an empty Options.
func NewOptions() *Options {
return &Options{}
}
// WithFilter sets a pod filter.
// ListPodsOnANode lists all of the pods on a node
// It also accepts an optional "filter" function which can be used to further limit the pods that are returned.
// (Usually this is podEvictor.IsEvictable, in order to only list the evictable pods on a node, but can
// be used by strategies to extend IsEvictable if there are further restrictions, such as with NodeAffinity).
// The filter function should return true if the pod should be returned from ListPodsOnANode
func (o *Options) WithFilter(filter FilterFunc) *Options {
o.filter = filter
return o
}
// WithNamespaces sets included namespaces
func (o *Options) WithNamespaces(namespaces sets.String) *Options {
o.includedNamespaces = namespaces
return o
}
// WithoutNamespaces sets excluded namespaces
func (o *Options) WithoutNamespaces(namespaces sets.String) *Options {
o.excludedNamespaces = namespaces
return o
}
// WithLabelSelector sets a pod label selector
func (o *Options) WithLabelSelector(labelSelector *metav1.LabelSelector) *Options {
o.labelSelector = labelSelector
return o
}
// BuildFilterFunc builds a final FilterFunc based on Options.
func (o *Options) BuildFilterFunc() (FilterFunc, error) {
var s labels.Selector
var err error
if o.labelSelector != nil {
s, err = metav1.LabelSelectorAsSelector(o.labelSelector)
if err != nil {
return nil, err
}
}
return func(pod *v1.Pod) bool {
if o.filter != nil && !o.filter(pod) {
return false
}
if len(o.includedNamespaces) > 0 && !o.includedNamespaces.Has(pod.Namespace) {
return false
}
if len(o.excludedNamespaces) > 0 && o.excludedNamespaces.Has(pod.Namespace) {
return false
}
if s != nil && !s.Matches(labels.Set(pod.GetLabels())) {
return false
}
return true
}, nil
}
// BuildGetPodsAssignedToNodeFunc establishes an indexer to map the pods and their assigned nodes.
// It returns a function to help us get all the pods that assigned to a node based on the indexer.
func BuildGetPodsAssignedToNodeFunc(podInformer coreinformers.PodInformer) (GetPodsAssignedToNodeFunc, error) {
// Establish an indexer to map the pods and their assigned nodes.
err := podInformer.Informer().AddIndexers(cache.Indexers{
nodeNameKeyIndex: func(obj interface{}) ([]string, error) {
pod, ok := obj.(*v1.Pod)
if !ok {
return []string{}, nil
}
if len(pod.Spec.NodeName) == 0 {
return []string{}, nil
}
return []string{pod.Spec.NodeName}, nil
},
})
if err != nil {
return nil, err
}
// The indexer helps us get all the pods that assigned to a node.
podIndexer := podInformer.Informer().GetIndexer()
getPodsAssignedToNode := func(nodeName string, filter FilterFunc) ([]*v1.Pod, error) {
objs, err := podIndexer.ByIndex(nodeNameKeyIndex, nodeName)
if err != nil {
return nil, err
}
pods := make([]*v1.Pod, 0, len(objs))
for _, obj := range objs {
pod, ok := obj.(*v1.Pod)
if !ok {
continue
}
if filter(pod) {
pods = append(pods, pod)
}
}
return pods, nil
}
return getPodsAssignedToNode, nil
}
// ListPodsOnANode lists all pods on a node.
// It also accepts a "filter" function which can be used to further limit the pods that are returned.
// (Usually this is podEvictor.Evictable().IsEvictable, in order to only list the evictable pods on a node, but can
// be used by strategies to extend it if there are further restrictions, such as with NodeAffinity).
func ListPodsOnANode(
nodeName string,
getPodsAssignedToNode GetPodsAssignedToNodeFunc,
filter FilterFunc,
) ([]*v1.Pod, error) {
// Succeeded and failed pods are not considered because they don't occupy any resource.
f := func(pod *v1.Pod) bool {
return pod.Status.Phase != v1.PodSucceeded && pod.Status.Phase != v1.PodFailed
}
return ListAllPodsOnANode(nodeName, getPodsAssignedToNode, WrapFilterFuncs(f, filter))
}
// ListAllPodsOnANode lists all the pods on a node no matter what the phase of the pod is.
func ListAllPodsOnANode(
nodeName string,
getPodsAssignedToNode GetPodsAssignedToNodeFunc,
filter FilterFunc,
) ([]*v1.Pod, error) {
pods, err := getPodsAssignedToNode(nodeName, filter)
func ListPodsOnANode(ctx context.Context, client clientset.Interface, node *v1.Node, filter func(pod *v1.Pod) bool) ([]*v1.Pod, error) {
fieldSelector, err := fields.ParseSelector("spec.nodeName=" + node.Name + ",status.phase!=" + string(v1.PodSucceeded) + ",status.phase!=" + string(v1.PodFailed))
if err != nil {
return []*v1.Pod{}, err
}
podList, err := client.CoreV1().Pods(v1.NamespaceAll).List(ctx,
metav1.ListOptions{FieldSelector: fieldSelector.String()})
if err != nil {
return []*v1.Pod{}, err
}
pods := make([]*v1.Pod, 0)
for i := range podList.Items {
if filter != nil && !filter(&podList.Items[i]) {
continue
}
pods = append(pods, &podList.Items[i])
}
return pods, nil
}
@@ -228,10 +93,3 @@ func SortPodsBasedOnPriorityLowToHigh(pods []*v1.Pod) {
return *pods[i].Spec.Priority < *pods[j].Spec.Priority
})
}
// SortPodsBasedOnAge sorts Pods from oldest to most recent in place
func SortPodsBasedOnAge(pods []*v1.Pod) {
sort.Slice(pods, func(i, j int) bool {
return pods[i].CreationTimestamp.Before(&pods[j].CreationTimestamp)
})
}

View File

@@ -20,15 +20,13 @@ import (
"context"
"fmt"
"reflect"
"strings"
"testing"
"time"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"sigs.k8s.io/descheduler/test"
)
@@ -40,80 +38,39 @@ var (
func TestListPodsOnANode(t *testing.T) {
testCases := []struct {
name string
pods []*v1.Pod
pods map[string][]v1.Pod
node *v1.Node
labelSelector *metav1.LabelSelector
expectedPodCount int
}{
{
name: "test listing pods on a node",
pods: []*v1.Pod{
test.BuildTestPod("pod1", 100, 0, "n1", nil),
test.BuildTestPod("pod2", 100, 0, "n1", nil),
test.BuildTestPod("pod3", 100, 0, "n2", nil),
pods: map[string][]v1.Pod{
"n1": {
*test.BuildTestPod("pod1", 100, 0, "n1", nil),
*test.BuildTestPod("pod2", 100, 0, "n1", nil),
},
"n2": {*test.BuildTestPod("pod3", 100, 0, "n2", nil)},
},
node: test.BuildTestNode("n1", 2000, 3000, 10, nil),
labelSelector: nil,
expectedPodCount: 2,
},
{
name: "test listing pods with label selector",
pods: []*v1.Pod{
test.BuildTestPod("pod1", 100, 0, "n1", nil),
test.BuildTestPod("pod2", 100, 0, "n1", func(pod *v1.Pod) {
pod.Labels = map[string]string{"foo": "bar"}
}),
test.BuildTestPod("pod3", 100, 0, "n1", func(pod *v1.Pod) {
pod.Labels = map[string]string{"foo": "bar1"}
}),
test.BuildTestPod("pod4", 100, 0, "n2", nil),
},
node: test.BuildTestNode("n1", 2000, 3000, 10, nil),
labelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "foo",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"bar", "bar1"},
},
},
},
expectedPodCount: 2,
},
}
for _, testCase := range testCases {
t.Run(testCase.name, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
var objs []runtime.Object
objs = append(objs, testCase.node)
for _, pod := range testCase.pods {
objs = append(objs, pod)
}
fakeClient := fake.NewSimpleClientset(objs...)
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
podInformer := sharedInformerFactory.Core().V1().Pods()
getPodsAssignedToNode, err := BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {
t.Errorf("Build get pods assigned to node function error: %v", err)
}
sharedInformerFactory.Start(ctx.Done())
sharedInformerFactory.WaitForCacheSync(ctx.Done())
filter, err := NewOptions().WithLabelSelector(testCase.labelSelector).BuildFilterFunc()
if err != nil {
t.Errorf("Build filter function error: %v", err)
}
pods, _ := ListPodsOnANode(testCase.node.Name, getPodsAssignedToNode, filter)
if len(pods) != testCase.expectedPodCount {
t.Errorf("Expected %v pods on node %v, got %+v", testCase.expectedPodCount, testCase.node.Name, len(pods))
fakeClient := &fake.Clientset{}
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
list := action.(core.ListAction)
fieldString := list.GetListRestrictions().Fields.String()
if strings.Contains(fieldString, "n1") {
return true, &v1.PodList{Items: testCase.pods["n1"]}, nil
} else if strings.Contains(fieldString, "n2") {
return true, &v1.PodList{Items: testCase.pods["n2"]}, nil
}
return true, nil, fmt.Errorf("Failed to list: %v", list)
})
pods, _ := ListPodsOnANode(context.TODO(), fakeClient, testCase.node, nil)
if len(pods) != testCase.expectedPodCount {
t.Errorf("expected %v pods on node %v, got %+v", testCase.expectedPodCount, testCase.node.Name, len(pods))
}
}
}
@@ -156,23 +113,3 @@ func TestSortPodsBasedOnPriorityLowToHigh(t *testing.T) {
t.Errorf("Expected last pod in sorted list to be %v which of highest priority and guaranteed but got %v", p4, podList[len(podList)-1])
}
}
func TestSortPodsBasedOnAge(t *testing.T) {
podList := make([]*v1.Pod, 9)
n1 := test.BuildTestNode("n1", 4000, 3000, int64(len(podList)), nil)
for i := 0; i < len(podList); i++ {
podList[i] = test.BuildTestPod(fmt.Sprintf("p%d", i), 1, 32, n1.Name, func(pod *v1.Pod) {
creationTimestamp := metav1.Now().Add(time.Minute * time.Duration(-i))
pod.ObjectMeta.SetCreationTimestamp(metav1.NewTime(creationTimestamp))
})
}
SortPodsBasedOnAge(podList)
for i := 0; i < len(podList)-1; i++ {
if podList[i+1].CreationTimestamp.Before(&podList[i].CreationTimestamp) {
t.Errorf("Expected pods to be sorted by age but pod at index %d was older than %d", i+1, i)
}
}
}

View File

@@ -30,7 +30,7 @@ import (
func LoadPolicyConfig(policyConfigFile string) (*api.DeschedulerPolicy, error) {
if policyConfigFile == "" {
klog.V(1).InfoS("Policy config file not specified")
klog.V(1).Infof("policy config file not specified")
return nil, nil
}

View File

@@ -19,22 +19,9 @@ package scheme
import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/serializer"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/api/v1alpha1"
"sigs.k8s.io/descheduler/pkg/apis/componentconfig"
componentconfigv1alpha1 "sigs.k8s.io/descheduler/pkg/apis/componentconfig/v1alpha1"
)
var (
Scheme = runtime.NewScheme()
Codecs = serializer.NewCodecFactory(Scheme)
)
func init() {
utilruntime.Must(api.AddToScheme(Scheme))
utilruntime.Must(v1alpha1.AddToScheme(Scheme))
utilruntime.Must(componentconfig.AddToScheme(Scheme))
utilruntime.Must(componentconfigv1alpha1.AddToScheme(Scheme))
}

View File

@@ -0,0 +1,127 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package strategies
import (
"context"
"reflect"
"sort"
"strings"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
)
// RemoveDuplicatePods removes the duplicate pods on node. This strategy evicts all duplicate pods on node.
// A pod is said to be a duplicate of other if both of them are from same creator, kind and are within the same
// namespace, and have at least one container with the same image.
// As of now, this strategy won't evict daemonsets, mirror pods, critical pods and pods with local storages.
func RemoveDuplicatePods(
ctx context.Context,
client clientset.Interface,
strategy api.DeschedulerStrategy,
nodes []*v1.Node,
podEvictor *evictions.PodEvictor,
) {
for _, node := range nodes {
klog.V(1).Infof("Processing node: %#v", node.Name)
pods, err := podutil.ListPodsOnANode(ctx, client, node, podEvictor.IsEvictable)
if err != nil {
klog.Errorf("error listing evictable pods on node %s: %+v", node.Name, err)
continue
}
duplicatePods := make([]*v1.Pod, 0, len(pods))
// Each pod has a list of owners and a list of containers, and each container has 1 image spec.
// For each pod, we go through all the OwnerRef/Image mappings and represent them as a "key" string.
// All of those mappings together makes a list of "key" strings that essentially represent that pod's uniqueness.
// This list of keys representing a single pod is then sorted alphabetically.
// If any other pod has a list that matches that pod's list, those pods are undeniably duplicates for the following reasons:
// - The 2 pods have the exact same ownerrefs
// - The 2 pods have the exact same container images
//
// duplicateKeysMap maps the first Namespace/Kind/Name/Image in a pod's list to a 2D-slice of all the other lists where that is the first key
// (Since we sort each pod's list, we only need to key the map on the first entry in each list. Any pod that doesn't have
// the same first entry is clearly not a duplicate. This makes lookup quick and minimizes storage needed).
// If any of the existing lists for that first key matches the current pod's list, the current pod is a duplicate.
// If not, then we add this pod's list to the list of lists for that key.
duplicateKeysMap := map[string][][]string{}
for _, pod := range pods {
ownerRefList := podutil.OwnerRef(pod)
if hasExcludedOwnerRefKind(ownerRefList, strategy) {
continue
}
podContainerKeys := make([]string, 0, len(ownerRefList)*len(pod.Spec.Containers))
for _, ownerRef := range ownerRefList {
for _, container := range pod.Spec.Containers {
// Namespace/Kind/Name should be unique for the cluster.
// We also consider the image, as 2 pods could have the same owner but serve different purposes
// So any non-unique Namespace/Kind/Name/Image pattern is a duplicate pod.
s := strings.Join([]string{pod.ObjectMeta.Namespace, ownerRef.Kind, ownerRef.Name, container.Image}, "/")
podContainerKeys = append(podContainerKeys, s)
}
}
sort.Strings(podContainerKeys)
// If there have been any other pods with the same first "key", look through all the lists to see if any match
if existing, ok := duplicateKeysMap[podContainerKeys[0]]; ok {
matched := false
for _, keys := range existing {
if reflect.DeepEqual(keys, podContainerKeys) {
matched = true
duplicatePods = append(duplicatePods, pod)
break
}
}
if !matched {
// Found no matches, add this list of keys to the list of lists that have the same first key
duplicateKeysMap[podContainerKeys[0]] = append(duplicateKeysMap[podContainerKeys[0]], podContainerKeys)
}
} else {
// This is the first pod we've seen that has this first "key" entry
duplicateKeysMap[podContainerKeys[0]] = [][]string{podContainerKeys}
}
}
for _, pod := range duplicatePods {
if _, err := podEvictor.EvictPod(ctx, pod, node, "RemoveDuplicatePods"); err != nil {
klog.Errorf("Error evicting pod: (%#v)", err)
break
}
}
}
}
func hasExcludedOwnerRefKind(ownerRefs []metav1.OwnerReference, strategy api.DeschedulerStrategy) bool {
if strategy.Params == nil || strategy.Params.RemoveDuplicates == nil {
return false
}
exclude := sets.NewString(strategy.Params.RemoveDuplicates.ExcludeOwnerKinds...)
for _, owner := range ownerRefs {
if exclude.Has(owner.Kind) {
return true
}
}
return false
}

View File

@@ -0,0 +1,212 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package strategies
import (
"context"
"testing"
"k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
"sigs.k8s.io/descheduler/pkg/utils"
"sigs.k8s.io/descheduler/test"
)
func TestFindDuplicatePods(t *testing.T) {
ctx := context.Background()
// first setup pods
node := test.BuildTestNode("n1", 2000, 3000, 10, nil)
p1 := test.BuildTestPod("p1", 100, 0, node.Name, nil)
p1.Namespace = "dev"
p2 := test.BuildTestPod("p2", 100, 0, node.Name, nil)
p2.Namespace = "dev"
p3 := test.BuildTestPod("p3", 100, 0, node.Name, nil)
p3.Namespace = "dev"
p4 := test.BuildTestPod("p4", 100, 0, node.Name, nil)
p5 := test.BuildTestPod("p5", 100, 0, node.Name, nil)
p6 := test.BuildTestPod("p6", 100, 0, node.Name, nil)
p7 := test.BuildTestPod("p7", 100, 0, node.Name, nil)
p7.Namespace = "kube-system"
p8 := test.BuildTestPod("p8", 100, 0, node.Name, nil)
p8.Namespace = "test"
p9 := test.BuildTestPod("p9", 100, 0, node.Name, nil)
p9.Namespace = "test"
p10 := test.BuildTestPod("p10", 100, 0, node.Name, nil)
p10.Namespace = "test"
p11 := test.BuildTestPod("p11", 100, 0, node.Name, nil)
p11.Namespace = "different-images"
p12 := test.BuildTestPod("p12", 100, 0, node.Name, nil)
p12.Namespace = "different-images"
p13 := test.BuildTestPod("p13", 100, 0, node.Name, nil)
p13.Namespace = "different-images"
p14 := test.BuildTestPod("p14", 100, 0, node.Name, nil)
p14.Namespace = "different-images"
// ### Evictable Pods ###
// Three Pods in the "default" Namespace, bound to same ReplicaSet. 2 should be evicted.
ownerRef1 := test.GetReplicaSetOwnerRefList()
p1.ObjectMeta.OwnerReferences = ownerRef1
p2.ObjectMeta.OwnerReferences = ownerRef1
p3.ObjectMeta.OwnerReferences = ownerRef1
// Three Pods in the "test" Namespace, bound to same ReplicaSet. 2 should be evicted.
ownerRef2 := test.GetReplicaSetOwnerRefList()
p8.ObjectMeta.OwnerReferences = ownerRef2
p9.ObjectMeta.OwnerReferences = ownerRef2
p10.ObjectMeta.OwnerReferences = ownerRef2
// ### Non-evictable Pods ###
// A DaemonSet.
p4.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
// A Pod with local storage.
p5.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
p5.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
// A Mirror Pod.
p6.Annotations = test.GetMirrorPodAnnotation()
// A Critical Pod.
priority := utils.SystemCriticalPriority
p7.Spec.Priority = &priority
// Same owners, but different images
p11.Spec.Containers[0].Image = "foo"
p11.ObjectMeta.OwnerReferences = ownerRef1
p12.Spec.Containers[0].Image = "bar"
p12.ObjectMeta.OwnerReferences = ownerRef1
// Multiple containers
p13.ObjectMeta.OwnerReferences = ownerRef1
p13.Spec.Containers = append(p13.Spec.Containers, v1.Container{
Name: "foo",
Image: "foo",
})
testCases := []struct {
description string
maxPodsToEvictPerNode int
pods []v1.Pod
expectedEvictedPodCount int
strategy api.DeschedulerStrategy
}{
{
description: "Three pods in the `dev` Namespace, bound to same ReplicaSet. 2 should be evicted.",
maxPodsToEvictPerNode: 5,
pods: []v1.Pod{*p1, *p2, *p3},
expectedEvictedPodCount: 2,
strategy: api.DeschedulerStrategy{},
},
{
description: "Three pods in the `dev` Namespace, bound to same ReplicaSet, but ReplicaSet kind is excluded. 0 should be evicted.",
maxPodsToEvictPerNode: 5,
pods: []v1.Pod{*p1, *p2, *p3},
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{RemoveDuplicates: &api.RemoveDuplicates{ExcludeOwnerKinds: []string{"ReplicaSet"}}}},
},
{
description: "Three Pods in the `test` Namespace, bound to same ReplicaSet. 2 should be evicted.",
maxPodsToEvictPerNode: 5,
pods: []v1.Pod{*p8, *p9, *p10},
expectedEvictedPodCount: 2,
strategy: api.DeschedulerStrategy{},
},
{
description: "Three Pods in the `dev` Namespace, three Pods in the `test` Namespace. Bound to ReplicaSet with same name. 4 should be evicted.",
maxPodsToEvictPerNode: 5,
pods: []v1.Pod{*p1, *p2, *p3, *p8, *p9, *p10},
expectedEvictedPodCount: 4,
strategy: api.DeschedulerStrategy{},
},
{
description: "Pods are: part of DaemonSet, with local storage, mirror pod annotation, critical pod annotation - none should be evicted.",
maxPodsToEvictPerNode: 2,
pods: []v1.Pod{*p4, *p5, *p6, *p7},
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{},
},
{
description: "Test all Pods: 4 should be evicted.",
maxPodsToEvictPerNode: 5,
pods: []v1.Pod{*p1, *p2, *p3, *p4, *p5, *p6, *p7, *p8, *p9, *p10},
expectedEvictedPodCount: 4,
strategy: api.DeschedulerStrategy{},
},
{
description: "Pods with the same owner but different images should not be evicted",
maxPodsToEvictPerNode: 5,
pods: []v1.Pod{*p11, *p12},
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{},
},
{
description: "Pods with multiple containers should not match themselves",
maxPodsToEvictPerNode: 5,
pods: []v1.Pod{*p13},
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{},
},
{
description: "Pods with matching ownerrefs and at not all matching image should not trigger an eviction",
maxPodsToEvictPerNode: 5,
pods: []v1.Pod{*p11, *p13},
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{},
},
}
for _, testCase := range testCases {
fakeClient := &fake.Clientset{}
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
return true, &v1.PodList{Items: testCase.pods}, nil
})
fakeClient.Fake.AddReactor("get", "nodes", func(action core.Action) (bool, runtime.Object, error) {
return true, node, nil
})
podEvictor := evictions.NewPodEvictor(
fakeClient,
"v1",
false,
testCase.maxPodsToEvictPerNode,
[]*v1.Node{node},
false,
)
RemoveDuplicatePods(ctx, fakeClient, testCase.strategy, []*v1.Node{node}, podEvictor)
podsEvicted := podEvictor.TotalEvicted()
if podsEvicted != testCase.expectedEvictedPodCount {
t.Errorf("Test error for description: %s. Expected evicted pods count %v, got %v", testCase.description, testCase.expectedEvictedPodCount, podsEvicted)
}
}
}

View File

@@ -0,0 +1,408 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package strategies
import (
"context"
"fmt"
"sort"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
)
// NodeUsageMap stores a node's info, pods on it and its resource usage
type NodeUsageMap struct {
node *v1.Node
usage api.ResourceThresholds
allPods []*v1.Pod
}
// NodePodsMap is a set of (node, pods) pairs
type NodePodsMap map[*v1.Node][]*v1.Pod
const (
// MinResourcePercentage is the minimum value of a resource's percentage
MinResourcePercentage = 0
// MaxResourcePercentage is the maximum value of a resource's percentage
MaxResourcePercentage = 100
)
// LowNodeUtilization evicts pods from overutilized nodes to underutilized nodes. Note that CPU/Memory requests are used
// to calculate nodes' utilization and not the actual resource usage.
func LowNodeUtilization(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor) {
// todo: move to config validation?
// TODO: May be create a struct for the strategy as well, so that we don't have to pass along the all the params?
if strategy.Params == nil || strategy.Params.NodeResourceUtilizationThresholds == nil {
klog.V(1).Infof("NodeResourceUtilizationThresholds not set")
return
}
thresholds := strategy.Params.NodeResourceUtilizationThresholds.Thresholds
targetThresholds := strategy.Params.NodeResourceUtilizationThresholds.TargetThresholds
if err := validateStrategyConfig(thresholds, targetThresholds); err != nil {
klog.Errorf("LowNodeUtilization config is not valid: %v", err)
return
}
// check if Pods/CPU/Mem are set, if not, set them to 100
if _, ok := thresholds[v1.ResourcePods]; !ok {
thresholds[v1.ResourcePods] = MaxResourcePercentage
targetThresholds[v1.ResourcePods] = MaxResourcePercentage
}
if _, ok := thresholds[v1.ResourceCPU]; !ok {
thresholds[v1.ResourceCPU] = MaxResourcePercentage
targetThresholds[v1.ResourceCPU] = MaxResourcePercentage
}
if _, ok := thresholds[v1.ResourceMemory]; !ok {
thresholds[v1.ResourceMemory] = MaxResourcePercentage
targetThresholds[v1.ResourceMemory] = MaxResourcePercentage
}
npm := createNodePodsMap(ctx, client, nodes)
lowNodes, targetNodes := classifyNodes(npm, thresholds, targetThresholds)
klog.V(1).Infof("Criteria for a node under utilization: CPU: %v, Mem: %v, Pods: %v",
thresholds[v1.ResourceCPU], thresholds[v1.ResourceMemory], thresholds[v1.ResourcePods])
if len(lowNodes) == 0 {
klog.V(1).Infof("No node is underutilized, nothing to do here, you might tune your thresholds further")
return
}
klog.V(1).Infof("Total number of underutilized nodes: %v", len(lowNodes))
if len(lowNodes) < strategy.Params.NodeResourceUtilizationThresholds.NumberOfNodes {
klog.V(1).Infof("number of nodes underutilized (%v) is less than NumberOfNodes (%v), nothing to do here", len(lowNodes), strategy.Params.NodeResourceUtilizationThresholds.NumberOfNodes)
return
}
if len(lowNodes) == len(nodes) {
klog.V(1).Infof("all nodes are underutilized, nothing to do here")
return
}
if len(targetNodes) == 0 {
klog.V(1).Infof("all nodes are under target utilization, nothing to do here")
return
}
klog.V(1).Infof("Criteria for a node above target utilization: CPU: %v, Mem: %v, Pods: %v",
targetThresholds[v1.ResourceCPU], targetThresholds[v1.ResourceMemory], targetThresholds[v1.ResourcePods])
klog.V(1).Infof("Total number of nodes above target utilization: %v", len(targetNodes))
evictPodsFromTargetNodes(
ctx,
targetNodes,
lowNodes,
targetThresholds,
podEvictor)
klog.V(1).Infof("Total number of pods evicted: %v", podEvictor.TotalEvicted())
}
// validateStrategyConfig checks if the strategy's config is valid
func validateStrategyConfig(thresholds, targetThresholds api.ResourceThresholds) error {
// validate thresholds and targetThresholds config
if err := validateThresholds(thresholds); err != nil {
return fmt.Errorf("thresholds config is not valid: %v", err)
}
if err := validateThresholds(targetThresholds); err != nil {
return fmt.Errorf("targetThresholds config is not valid: %v", err)
}
// validate if thresholds and targetThresholds have same resources configured
if len(thresholds) != len(targetThresholds) {
return fmt.Errorf("thresholds and targetThresholds configured different resources")
}
for resourceName, value := range thresholds {
if targetValue, ok := targetThresholds[resourceName]; !ok {
return fmt.Errorf("thresholds and targetThresholds configured different resources")
} else if value > targetValue {
return fmt.Errorf("thresholds' %v percentage is greater than targetThresholds'", resourceName)
}
}
return nil
}
// validateThresholds checks if thresholds have valid resource name and resource percentage configured
func validateThresholds(thresholds api.ResourceThresholds) error {
if thresholds == nil || len(thresholds) == 0 {
return fmt.Errorf("no resource threshold is configured")
}
for name, percent := range thresholds {
switch name {
case v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods:
if percent < MinResourcePercentage || percent > MaxResourcePercentage {
return fmt.Errorf("%v threshold not in [%v, %v] range", name, MinResourcePercentage, MaxResourcePercentage)
}
default:
return fmt.Errorf("only cpu, memory, or pods thresholds can be specified")
}
}
return nil
}
// classifyNodes classifies the nodes into low-utilization or high-utilization nodes. If a node lies between
// low and high thresholds, it is simply ignored.
func classifyNodes(npm NodePodsMap, thresholds api.ResourceThresholds, targetThresholds api.ResourceThresholds) ([]NodeUsageMap, []NodeUsageMap) {
lowNodes, targetNodes := []NodeUsageMap{}, []NodeUsageMap{}
for node, pods := range npm {
usage := nodeUtilization(node, pods)
nuMap := NodeUsageMap{
node: node,
usage: usage,
allPods: pods,
}
// Check if node is underutilized and if we can schedule pods on it.
if !nodeutil.IsNodeUnschedulable(node) && isNodeWithLowUtilization(usage, thresholds) {
klog.V(2).Infof("Node %#v is under utilized with usage: %#v", node.Name, usage)
lowNodes = append(lowNodes, nuMap)
} else if isNodeAboveTargetUtilization(usage, targetThresholds) {
klog.V(2).Infof("Node %#v is over utilized with usage: %#v", node.Name, usage)
targetNodes = append(targetNodes, nuMap)
} else {
klog.V(2).Infof("Node %#v is appropriately utilized with usage: %#v", node.Name, usage)
}
}
return lowNodes, targetNodes
}
// evictPodsFromTargetNodes evicts pods based on priority, if all the pods on the node have priority, if not
// evicts them based on QoS as fallback option.
// TODO: @ravig Break this function into smaller functions.
func evictPodsFromTargetNodes(
ctx context.Context,
targetNodes, lowNodes []NodeUsageMap,
targetThresholds api.ResourceThresholds,
podEvictor *evictions.PodEvictor,
) {
sortNodesByUsage(targetNodes)
// upper bound on total number of pods/cpu/memory to be moved
var totalPods, totalCPU, totalMem float64
var taintsOfLowNodes = make(map[string][]v1.Taint, len(lowNodes))
for _, node := range lowNodes {
taintsOfLowNodes[node.node.Name] = node.node.Spec.Taints
nodeCapacity := node.node.Status.Capacity
if len(node.node.Status.Allocatable) > 0 {
nodeCapacity = node.node.Status.Allocatable
}
// totalPods to be moved
podsPercentage := targetThresholds[v1.ResourcePods] - node.usage[v1.ResourcePods]
totalPods += ((float64(podsPercentage) * float64(nodeCapacity.Pods().Value())) / 100)
// totalCPU capacity to be moved
cpuPercentage := targetThresholds[v1.ResourceCPU] - node.usage[v1.ResourceCPU]
totalCPU += ((float64(cpuPercentage) * float64(nodeCapacity.Cpu().MilliValue())) / 100)
// totalMem capacity to be moved
memPercentage := targetThresholds[v1.ResourceMemory] - node.usage[v1.ResourceMemory]
totalMem += ((float64(memPercentage) * float64(nodeCapacity.Memory().Value())) / 100)
}
klog.V(1).Infof("Total capacity to be moved: CPU:%v, Mem:%v, Pods:%v", totalCPU, totalMem, totalPods)
klog.V(1).Infof("********Number of pods evicted from each node:***********")
for _, node := range targetNodes {
nodeCapacity := node.node.Status.Capacity
if len(node.node.Status.Allocatable) > 0 {
nodeCapacity = node.node.Status.Allocatable
}
klog.V(3).Infof("evicting pods from node %#v with usage: %#v", node.node.Name, node.usage)
nonRemovablePods, removablePods := classifyPods(node.allPods, podEvictor)
klog.V(2).Infof("allPods:%v, nonRemovablePods:%v, removablePods:%v", len(node.allPods), len(nonRemovablePods), len(removablePods))
if len(removablePods) == 0 {
klog.V(1).Infof("no removable pods on node %#v, try next node", node.node.Name)
continue
}
klog.V(1).Infof("evicting pods based on priority, if they have same priority, they'll be evicted based on QoS tiers")
// sort the evictable Pods based on priority. This also sorts them based on QoS. If there are multiple pods with same priority, they are sorted based on QoS tiers.
podutil.SortPodsBasedOnPriorityLowToHigh(removablePods)
evictPods(ctx, removablePods, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, taintsOfLowNodes, podEvictor, node.node)
klog.V(1).Infof("%v pods evicted from node %#v with usage %v", podEvictor.NodeEvicted(node.node), node.node.Name, node.usage)
}
}
func evictPods(
ctx context.Context,
inputPods []*v1.Pod,
targetThresholds api.ResourceThresholds,
nodeCapacity v1.ResourceList,
nodeUsage api.ResourceThresholds,
totalPods *float64,
totalCPU *float64,
totalMem *float64,
taintsOfLowNodes map[string][]v1.Taint,
podEvictor *evictions.PodEvictor,
node *v1.Node) {
// stop if node utilization drops below target threshold or any of required capacity (cpu, memory, pods) is moved
if isNodeAboveTargetUtilization(nodeUsage, targetThresholds) && *totalPods > 0 && *totalCPU > 0 && *totalMem > 0 {
onePodPercentage := api.Percentage((float64(1) * 100) / float64(nodeCapacity.Pods().Value()))
for _, pod := range inputPods {
if !utils.PodToleratesTaints(pod, taintsOfLowNodes) {
klog.V(3).Infof("Skipping eviction for Pod: %#v, doesn't tolerate node taint", pod.Name)
continue
}
cUsage := utils.GetResourceRequest(pod, v1.ResourceCPU)
mUsage := utils.GetResourceRequest(pod, v1.ResourceMemory)
success, err := podEvictor.EvictPod(ctx, pod, node, "LowNodeUtilization")
if err != nil {
klog.Errorf("Error evicting pod: (%#v)", err)
break
}
if success {
klog.V(3).Infof("Evicted pod: %#v", pod.Name)
// update remaining pods
nodeUsage[v1.ResourcePods] -= onePodPercentage
*totalPods--
// update remaining cpu
*totalCPU -= float64(cUsage)
nodeUsage[v1.ResourceCPU] -= api.Percentage((float64(cUsage) * 100) / float64(nodeCapacity.Cpu().MilliValue()))
// update remaining memory
*totalMem -= float64(mUsage)
nodeUsage[v1.ResourceMemory] -= api.Percentage(float64(mUsage) / float64(nodeCapacity.Memory().Value()) * 100)
klog.V(3).Infof("updated node usage: %#v", nodeUsage)
// check if node utilization drops below target threshold or any required capacity (cpu, memory, pods) is moved
if !isNodeAboveTargetUtilization(nodeUsage, targetThresholds) || *totalPods <= 0 || *totalCPU <= 0 || *totalMem <= 0 {
break
}
}
}
}
}
// sortNodesByUsage sorts nodes based on usage in descending order
func sortNodesByUsage(nodes []NodeUsageMap) {
sort.Slice(nodes, func(i, j int) bool {
var ti, tj api.Percentage
for name, value := range nodes[i].usage {
if name == v1.ResourceCPU || name == v1.ResourceMemory || name == v1.ResourcePods {
ti += value
}
}
for name, value := range nodes[j].usage {
if name == v1.ResourceCPU || name == v1.ResourceMemory || name == v1.ResourcePods {
tj += value
}
}
// To return sorted in descending order
return ti > tj
})
}
// createNodePodsMap returns nodepodsmap with evictable pods on node.
func createNodePodsMap(ctx context.Context, client clientset.Interface, nodes []*v1.Node) NodePodsMap {
npm := NodePodsMap{}
for _, node := range nodes {
pods, err := podutil.ListPodsOnANode(ctx, client, node, nil)
if err != nil {
klog.Warningf("node %s will not be processed, error in accessing its pods (%#v)", node.Name, err)
} else {
npm[node] = pods
}
}
return npm
}
// isNodeAboveTargetUtilization checks if a node is overutilized
func isNodeAboveTargetUtilization(nodeThresholds api.ResourceThresholds, thresholds api.ResourceThresholds) bool {
for name, nodeValue := range nodeThresholds {
if name == v1.ResourceCPU || name == v1.ResourceMemory || name == v1.ResourcePods {
if value, ok := thresholds[name]; !ok {
continue
} else if nodeValue > value {
return true
}
}
}
return false
}
// isNodeWithLowUtilization checks if a node is underutilized
func isNodeWithLowUtilization(nodeThresholds api.ResourceThresholds, thresholds api.ResourceThresholds) bool {
for name, nodeValue := range nodeThresholds {
if name == v1.ResourceCPU || name == v1.ResourceMemory || name == v1.ResourcePods {
if value, ok := thresholds[name]; !ok {
continue
} else if nodeValue > value {
return false
}
}
}
return true
}
func nodeUtilization(node *v1.Node, pods []*v1.Pod) api.ResourceThresholds {
totalReqs := map[v1.ResourceName]*resource.Quantity{
v1.ResourceCPU: {},
v1.ResourceMemory: {},
}
for _, pod := range pods {
req, _ := utils.PodRequestsAndLimits(pod)
for name, quantity := range req {
if name == v1.ResourceCPU || name == v1.ResourceMemory {
// As Quantity.Add says: Add adds the provided y quantity to the current value. If the current value is zero,
// the format of the quantity will be updated to the format of y.
totalReqs[name].Add(quantity)
}
}
}
nodeCapacity := node.Status.Capacity
if len(node.Status.Allocatable) > 0 {
nodeCapacity = node.Status.Allocatable
}
totalPods := len(pods)
return api.ResourceThresholds{
v1.ResourceCPU: api.Percentage((float64(totalReqs[v1.ResourceCPU].MilliValue()) * 100) / float64(nodeCapacity.Cpu().MilliValue())),
v1.ResourceMemory: api.Percentage(float64(totalReqs[v1.ResourceMemory].Value()) / float64(nodeCapacity.Memory().Value()) * 100),
v1.ResourcePods: api.Percentage((float64(totalPods) * 100) / float64(nodeCapacity.Pods().Value())),
}
}
func classifyPods(pods []*v1.Pod, evictor *evictions.PodEvictor) ([]*v1.Pod, []*v1.Pod) {
var nonRemovablePods, removablePods []*v1.Pod
for _, pod := range pods {
if !evictor.IsEvictable(pod) {
nonRemovablePods = append(nonRemovablePods, pod)
} else {
removablePods = append(removablePods, pod)
}
}
return nonRemovablePods, removablePods
}

Some files were not shown because too many files have changed in this diff Show More