Merge pull request #1158 from a7i/fix-plugin-arg-copy-release-1.27

fix plugin arg conversion when using multiple profiles with same plugin
fix plugin arg conversion when using multiple profiles with same plugin (#1143 )
2026-01-26 21:31:18 +01:00 · 2023-05-29 01:33:43 -07:00 · 2023-05-25 17:45:08 -04:00 · 2023-05-05 07:09:12 -07:00 · 2023-05-03 18:46:44 -07:00 · 2023-05-02 13:04:41 -04:00
3822 changed files with 381008 additions and 125672 deletions
--- a/.github/ci/ct.yaml
+++ b/.github/ci/ct.yaml
@@ -0,0 +1,5 @@
+chart-dirs:
+  - charts
+helm-extra-args: "--timeout=5m"
+check-version-increment: false
+target-branch: master
--- a/.github/workflows/helm.yaml
+++ b/.github/workflows/helm.yaml
@@ -0,0 +1,62 @@
+name: Helm
+
+on:
+  push:
+    branches:
+      - master
+      - release-*
+    paths:
+      - 'charts/**'
+      - '.github/workflows/helm.yaml'
+      - '.github/ci/ct.yaml'
+  pull_request:
+    paths:
+      - 'charts/**'
+      - '.github/workflows/helm.yaml'
+      - '.github/ci/ct.yaml'
+
+jobs:
+  lint-and-test:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: Set up Helm
+        uses: azure/setup-helm@v2.1
+        with:
+          version: v3.9.2
+
+      - uses: actions/setup-python@v3.1.2
+        with:
+          python-version: 3.7
+
+      - uses: actions/setup-go@v3
+        with:
+          go-version: '1.20.3'
+
+      - name: Set up chart-testing
+        uses: helm/chart-testing-action@v2.2.1
+        with:
+          version: v3.7.0
+
+      - name: Run chart-testing (list-changed)
+        id: list-changed
+        run: |
+          changed=$(ct list-changed --config=.github/ci/ct.yaml)
+          if [[ -n "$changed" ]]; then
+            echo "::set-output name=changed::true"
+          fi
+
+      - name: Run chart-testing (lint)
+        run: ct lint --config=.github/ci/ct.yaml --validate-maintainers=false
+
+      # Need a multi node cluster so descheduler runs until evictions
+      - name: Create multi node Kind cluster
+        run: make kind-multi-node
+
+      # helm-extra-set-args only available after ct 3.6.0
+      - name: Run chart-testing (install)
+        run: ct install --config=.github/ci/ct.yaml --helm-extra-set-args='--set=kind=Deployment'
--- a/.github/workflows/manifests.yaml
+++ b/.github/workflows/manifests.yaml
@@ -0,0 +1,38 @@
+name: manifests
+
+on:
+  pull_request:
+
+jobs:
+  deploy:
+    strategy: 
+      matrix:
+        k8s-version: ["v1.26.0"]
+        descheduler-version: ["v0.26.1"]
+        descheduler-api: ["v1alpha1", "v1alpha2"]
+        manifest: ["deployment"]
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout Repo
+        uses: actions/checkout@v3
+      - name: Create kind cluster
+        uses: helm/kind-action@v1.5.0
+        with:
+          node_image: kindest/node:${{ matrix.k8s-version }}
+          kubectl_version: ${{ matrix.k8s-version }}
+          config: test/kind-config.yaml
+      - name: Build image
+        run: |
+          VERSION="dev" make dev-image
+          docker tag descheduler:dev registry.k8s.io/descheduler/descheduler:${{ matrix.descheduler-version }}
+      - name: Kind load image
+        run: |
+          kind load docker-image registry.k8s.io/descheduler/descheduler:${{ matrix.descheduler-version }} --name chart-testing
+      - name: Create k8s manifests
+        run: |
+          kubectl create -f kubernetes/base/rbac.yaml
+          kubectl create -f test/manifests/${{ matrix.descheduler-api }}/configmap.yaml
+          kubectl create -f kubernetes/${{ matrix.manifest }}/${{ matrix.manifest }}.yaml
+      - name: Wait for ready condition
+        run: |
+          kubectl wait --for=condition=Available --timeout=60s ${{ matrix.manifest }} descheduler -n kube-system
--- a/.github/workflows/security.yaml
+++ b/.github/workflows/security.yaml
@@ -0,0 +1,47 @@
+name: "Security"
+
+on:
+  push:
+    branches:
+      - main
+      - master
+      - release-*
+  schedule:
+    - cron: '30 1 * * 0'
+
+jobs:
+  analyze:
+    name: Analyze
+    runs-on: ubuntu-latest
+    permissions:
+      actions: read
+      contents: read
+      security-events: write
+
+    strategy:
+      fail-fast: false
+    steps:
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          fetch-depth: 0
+
+      - name: Build image
+        run: |
+          IMAGE_REPO=${HELM_IMAGE_REPO:-descheduler}
+          IMAGE_TAG=${HELM_IMAGE_TAG:-security-test}
+          VERSION=security-test make image
+      - name: Run Trivy vulnerability scanner
+        uses: aquasecurity/trivy-action@master
+        with:
+          image-ref: 'descheduler:security-test'
+          format: 'sarif'
+          exit-code: '0'
+          severity: 'CRITICAL,HIGH'
+          output: 'trivy-results.sarif'
+
+      - name: Upload Trivy scan results to GitHub Security tab
+        uses: github/codeql-action/upload-sarif@v2
+        with:
+          sarif_file: 'trivy-results.sarif'
+          exit-code: '0'
--- a/.gitignore
+++ b/.gitignore
@@ -4,4 +4,5 @@ vendordiff.patch
 .idea/
 *.code-workspace
 .vscode/
-kind
+kind
+bin/
--- a/.golangci.yml
+++ b/.golangci.yml
@@ -5,11 +5,14 @@ linters:
  disable-all: true
  enable:
    - gofmt
+    - gofumpt
    - gosimple
    - gocyclo
    - misspell
    - govet

 linters-settings:
+  gofumpt:
+    extra-rules: true
  goimports:
 local-prefixes: sigs.k8s.io/descheduler
--- a/2
+++ b/2
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-FROM golang:1.16.7
+FROM golang:1.20.3

 WORKDIR /go/src/sigs.k8s.io/descheduler
 COPY . .
--- a/71
+++ b/71
@@ -14,8 +14,10 @@

 .PHONY: test

+export CONTAINER_ENGINE ?= docker
+
 # VERSION is based on a date stamp plus the last commit
-VERSION?=v$(shell date +%Y%m%d)-$(shell git describe --tags --match "v*")
+VERSION?=v$(shell date +%Y%m%d)-$(shell git describe --tags)
 BRANCH?=$(shell git branch --show-current)
 SHA1?=$(shell git rev-parse HEAD)
 BUILD=$(shell date +%FT%T%z)
@@ -24,9 +26,12 @@ ARCHS = amd64 arm arm64

 LDFLAGS=-ldflags "-X ${LDFLAG_LOCATION}.version=${VERSION} -X ${LDFLAG_LOCATION}.buildDate=${BUILD} -X ${LDFLAG_LOCATION}.gitbranch=${BRANCH} -X ${LDFLAG_LOCATION}.gitsha1=${SHA1}"

-GOLANGCI_VERSION := v1.30.0
+GOLANGCI_VERSION := v1.52.1
 HAS_GOLANGCI := $(shell ls _output/bin/golangci-lint 2> /dev/null)

+GOFUMPT_VERSION := v0.4.0
+HAS_GOFUMPT := $(shell command -v gofumpt 2> /dev/null)
+
 # REGISTRY is the container registry to push
 # into. The default is to push to the staging
 # registry, not production.
@@ -60,42 +65,42 @@ build.arm64:
 	CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build ${LDFLAGS} -o _output/bin/descheduler sigs.k8s.io/descheduler/cmd/descheduler

 dev-image: build
-	docker build -f Dockerfile.dev -t $(IMAGE) .
+	$(CONTAINER_ENGINE) build -f Dockerfile.dev -t $(IMAGE) .

 image:
-	docker build --build-arg VERSION="$(VERSION)" --build-arg ARCH="amd64" -t $(IMAGE) .
+	$(CONTAINER_ENGINE) build --build-arg VERSION="$(VERSION)" --build-arg ARCH="amd64" -t $(IMAGE) .

 image.amd64:
-	docker build --build-arg VERSION="$(VERSION)" --build-arg ARCH="amd64" -t $(IMAGE)-amd64 .
+	$(CONTAINER_ENGINE) build --build-arg VERSION="$(VERSION)" --build-arg ARCH="amd64" -t $(IMAGE)-amd64 .

 image.arm:
-	docker build --build-arg VERSION="$(VERSION)" --build-arg ARCH="arm" -t $(IMAGE)-arm .
+	$(CONTAINER_ENGINE) build --build-arg VERSION="$(VERSION)" --build-arg ARCH="arm" -t $(IMAGE)-arm .

 image.arm64:
-	docker build --build-arg VERSION="$(VERSION)" --build-arg ARCH="arm64" -t $(IMAGE)-arm64 .
+	$(CONTAINER_ENGINE) build --build-arg VERSION="$(VERSION)" --build-arg ARCH="arm64" -t $(IMAGE)-arm64 .

 push: image
 	gcloud auth configure-docker
-	docker tag $(IMAGE) $(IMAGE_GCLOUD)
-	docker push $(IMAGE_GCLOUD)
+	$(CONTAINER_ENGINE) tag $(IMAGE) $(IMAGE_GCLOUD)
+	$(CONTAINER_ENGINE) push $(IMAGE_GCLOUD)

 push-all: image.amd64 image.arm image.arm64
 	gcloud auth configure-docker
 	for arch in $(ARCHS); do \
-		docker tag $(IMAGE)-$${arch} $(IMAGE_GCLOUD)-$${arch} ;\
-		docker push $(IMAGE_GCLOUD)-$${arch} ;\
+		$(CONTAINER_ENGINE) tag $(IMAGE)-$${arch} $(IMAGE_GCLOUD)-$${arch} ;\
+		$(CONTAINER_ENGINE) push $(IMAGE_GCLOUD)-$${arch} ;\
 	done
-	DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create $(IMAGE_GCLOUD) $(addprefix --amend $(IMAGE_GCLOUD)-, $(ARCHS))
+	DOCKER_CLI_EXPERIMENTAL=enabled $(CONTAINER_ENGINE) manifest create $(IMAGE_GCLOUD) $(addprefix --amend $(IMAGE_GCLOUD)-, $(ARCHS))
 	for arch in $(ARCHS); do \
-		DOCKER_CLI_EXPERIMENTAL=enabled docker manifest annotate --arch $${arch} $(IMAGE_GCLOUD) $(IMAGE_GCLOUD)-$${arch} ;\
+		DOCKER_CLI_EXPERIMENTAL=enabled $(CONTAINER_ENGINE) manifest annotate --arch $${arch} $(IMAGE_GCLOUD) $(IMAGE_GCLOUD)-$${arch} ;\
 	done
-	DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push $(IMAGE_GCLOUD) ;\
+	DOCKER_CLI_EXPERIMENTAL=enabled $(CONTAINER_ENGINE) manifest push $(IMAGE_GCLOUD) ;\

 clean:
 	rm -rf _output
 	rm -rf _tmp

-verify: verify-govet verify-spelling verify-gofmt verify-vendor lint lint-chart verify-toc verify-gen
+verify: verify-govet verify-spelling verify-gofmt verify-vendor lint lint-chart verify-gen

 verify-govet:
 	./hack/verify-govet.sh
@@ -109,8 +114,8 @@ verify-gofmt:
 verify-vendor:
 	./hack/verify-vendor.sh

-verify-toc:
-	./hack/verify-toc.sh
+verify-docs:
+	./hack/verify-docs.sh

 test-unit:
 	./test/run-unit-tests.sh
@@ -122,26 +127,44 @@ gen:
 	./hack/update-generated-conversions.sh
 	./hack/update-generated-deep-copies.sh
 	./hack/update-generated-defaulters.sh
-	./hack/update-toc.sh
+	./hack/update-docs.sh

 verify-gen:
 	./hack/verify-conversions.sh
 	./hack/verify-deep-copies.sh
 	./hack/verify-defaulters.sh
+	./hack/verify-docs.sh

 lint:
 ifndef HAS_GOLANGCI
-	curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | sh -s -- -b ./_output/bin ${GOLANGCI_VERSION}
+	curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b ./_output/bin ${GOLANGCI_VERSION}
 endif
 	./_output/bin/golangci-lint run

-lint-chart: ensure-helm-install
-	helm lint ./charts/descheduler
+fmt:
+ifndef HAS_GOFUMPT
+	go install mvdan.cc/gofumpt@${GOFUMPT_VERSION}
+endif
+	gofumpt -w -extra .

-test-helm: ensure-helm-install
-	./test/run-helm-tests.sh
+# helm

 ensure-helm-install:
 ifndef HAS_HELM
 	curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 && chmod 700 ./get_helm.sh && ./get_helm.sh
-endif
+endif
+
+lint-chart: ensure-helm-install
+	helm lint ./charts/descheduler
+
+build-helm:
+	helm package ./charts/descheduler --dependency-update --destination ./bin/chart
+
+test-helm: ensure-helm-install
+	./test/run-helm-tests.sh
+
+kind-multi-node:
+	kind create cluster --name kind --config ./hack/kind_config.yaml --wait 2m
+
+ct-helm:
+	./hack/verify-chart.sh
--- a/8
+++ b/8
@@ -2,14 +2,16 @@ approvers:
 - damemi
 - ingvagabund
 - seanmalloy
+- a7i
+- knelasevero
 reviewers:
- aveshagarwal
- k82cn
- ravisantoshgudimetla
 - damemi
 - seanmalloy
 - ingvagabund
 - lixiang233
+- a7i
+- janeliul
+- knelasevero
 emeritus_approvers:
 - aveshagarwal
 - k82cn
--- a/README.md
+++ b/README.md
--- a/assets/logo/descheduler-stacked-color.png
+++ b/assets/logo/descheduler-stacked-color.png
--- a/charts/descheduler/Chart.yaml
+++ b/charts/descheduler/Chart.yaml
@@ -1,7 +1,7 @@
 apiVersion: v1
 name: descheduler
-version: 0.22.0
-appVersion: 0.22.0
+version: 0.27.0
+appVersion: 0.27.0
 description: Descheduler for Kubernetes is used to rebalance clusters by evicting pods that can potentially be scheduled on better nodes. In the current implementation, descheduler does not schedule replacement of evicted pods but relies on the default scheduler for that.
 keywords:
 - kubernetes
--- a/charts/descheduler/README.md
+++ b/charts/descheduler/README.md
@@ -43,28 +43,46 @@ The command removes all the Kubernetes components associated with the chart and

 The following table lists the configurable parameters of the _descheduler_ chart and their default values.

-| Parameter                      | Description                                                                                                           | Default                              |
-| ------------------------------ | --------------------------------------------------------------------------------------------------------------------- | ------------------------------------ |
-| `kind`                         | Use as CronJob or Deployment                                                                                          | `CronJob`                            |
-| `image.repository`             | Docker repository to use                                                                                              | `k8s.gcr.io/descheduler/descheduler` |
-| `image.tag`                    | Docker tag to use                                                                                                     | `v[chart appVersion]`                |
-| `image.pullPolicy`             | Docker image pull policy                                                                                              | `IfNotPresent`                       |
-| `imagePullSecrets`             | Docker repository secrets                                                                                              | `[]`                       |
-| `nameOverride`                 | String to partially override `descheduler.fullname` template (will prepend the release name)                          | `""`                                 |
-| `fullnameOverride`             | String to fully override `descheduler.fullname` template                                                              | `""`                                 |
-| `cronJobApiVersion`            | CronJob API Group Version                                                                                             | `"batch/v1"`                         |
-| `schedule`                     | The cron schedule to run the _descheduler_ job on                                                                     | `"*/2 * * * *"`                      |
-| `startingDeadlineSeconds`      | If set, configure `startingDeadlineSeconds` for the _descheduler_ job                                                 | `nil`                                |
-| `successfulJobsHistoryLimit`   | If set, configure `successfulJobsHistoryLimit` for the _descheduler_ job                                              | `nil`                                |
-| `failedJobsHistoryLimit`       | If set, configure `failedJobsHistoryLimit` for the _descheduler_ job                                                  | `nil`                                |
-| `deschedulingInterval`         | If using kind:Deployment, sets time between consecutive descheduler executions.                                       | `5m`                                 |
-| `cmdOptions`                   | The options to pass to the _descheduler_ command                                                                      | _see values.yaml_                    |
-| `deschedulerPolicy.strategies` | The _descheduler_ strategies to apply                                                                                 | _see values.yaml_                    |
-| `priorityClassName`            | The name of the priority class to add to pods                                                                         | `system-cluster-critical`            |
-| `rbac.create`                  | If `true`, create & use RBAC resources                                                                                | `true`                               |
-| `podSecurityPolicy.create`     | If `true`, create PodSecurityPolicy                                                                                   | `true`                               |
-| `resources`                    | Descheduler container CPU and memory requests/limits                                                                  | _see values.yaml_                    |
-| `serviceAccount.create`        | If `true`, create a service account for the cron job                                                                  | `true`                               |
-| `serviceAccount.name`          | The name of the service account to use, if not set and create is true a name is generated using the fullname template | `nil`                                |
-| `nodeSelector`                 | Node selectors to run the descheduler cronjob on specific nodes                                                       | `nil`                                |
-| `tolerations`                  | tolerations to run the descheduler cronjob on specific nodes                                                          | `nil`                                |
+| Parameter                           | Description                                                                                                           | Default                                   |
+| ----------------------------------- | --------------------------------------------------------------------------------------------------------------------- | ----------------------------------------- |
+| `kind`                              | Use as CronJob or Deployment                                                                                          | `CronJob`                                 |
+| `image.repository`                  | Docker repository to use                                                                                              | `registry.k8s.io/descheduler/descheduler` |
+| `image.tag`                         | Docker tag to use                                                                                                     | `v[chart appVersion]`                     |
+| `image.pullPolicy`                  | Docker image pull policy                                                                                              | `IfNotPresent`                            |
+| `imagePullSecrets`                  | Docker repository secrets                                                                                             | `[]`                                      |
+| `nameOverride`                      | String to partially override `descheduler.fullname` template (will prepend the release name)                          | `""`                                      |
+| `fullnameOverride`                  | String to fully override `descheduler.fullname` template                                                              | `""`                                      |
+| `cronJobApiVersion`                 | CronJob API Group Version                                                                                             | `"batch/v1"`                              |
+| `schedule`                          | The cron schedule to run the _descheduler_ job on                                                                     | `"*/2 * * * *"`                           |
+| `startingDeadlineSeconds`           | If set, configure `startingDeadlineSeconds` for the _descheduler_ job                                                 | `nil`                                     |
+| `successfulJobsHistoryLimit`        | If set, configure `successfulJobsHistoryLimit` for the _descheduler_ job                                              | `3`                                       |
+| `failedJobsHistoryLimit`            | If set, configure `failedJobsHistoryLimit` for the _descheduler_ job                                                  | `1`                                       |
+| `ttlSecondsAfterFinished`           | If set, configure `ttlSecondsAfterFinished` for the _descheduler_ job                                                 | `nil`                                     |
+| `deschedulingInterval`              | If using kind:Deployment, sets time between consecutive descheduler executions.                                       | `5m`                                      |
+| `replicas`                          | The replica count for Deployment                                                                                      | `1`                                       |
+| `leaderElection`                    | The options for high availability when running replicated components                                                  | _see values.yaml_                         |
+| `cmdOptions`                        | The options to pass to the _descheduler_ command                                                                      | _see values.yaml_                         |
+| `deschedulerPolicy.strategies`      | The _descheduler_ strategies to apply                                                                                 | _see values.yaml_                         |
+| `priorityClassName`                 | The name of the priority class to add to pods                                                                         | `system-cluster-critical`                 |
+| `rbac.create`                       | If `true`, create & use RBAC resources                                                                                | `true`                                    |
+| `resources`                         | Descheduler container CPU and memory requests/limits                                                                  | _see values.yaml_                         |
+| `serviceAccount.create`             | If `true`, create a service account for the cron job                                                                  | `true`                                    |
+| `serviceAccount.name`               | The name of the service account to use, if not set and create is true a name is generated using the fullname template | `nil`                                     |
+| `serviceAccount.annotations`        | Specifies custom annotations for the serviceAccount                                                                   | `{}`                                      |
+| `podAnnotations`                    | Annotations to add to the descheduler Pods                                                                            | `{}`                                      |
+| `podLabels`                         | Labels to add to the descheduler Pods                                                                                 | `{}`                                      |
+| `nodeSelector`                      | Node selectors to run the descheduler cronjob/deployment on specific nodes                                            | `nil`                                     |
+| `service.enabled`                   | If `true`, create a service for deployment                                                                            | `false`                                   |
+| `serviceMonitor.enabled`            | If `true`, create a ServiceMonitor for deployment                                                                     | `false`                                   |
+| `serviceMonitor.namespace`          | The namespace where Prometheus expects to find service monitors                                                       | `nil`                                     |
+| `serviceMonitor.interval`           | The scrape interval. If not set, the Prometheus default scrape interval is used                                       | `nil`                                     |
+| `serviceMonitor.honorLabels`        | Keeps the scraped data's labels when labels are on collisions with target labels.                                     | `true`                                    |
+| `serviceMonitor.insecureSkipVerify` | Skip TLS certificate validation when scraping                                                                         | `true`                                    |
+| `serviceMonitor.serverName`         | Name of the server to use when validating TLS certificate                                                             | `nil`                                     |
+| `serviceMonitor.metricRelabelings`  | MetricRelabelConfigs to apply to samples after scraping, but before ingestion                                         | `[]`                                      |
+| `serviceMonitor.relabelings`        | RelabelConfigs to apply to samples before scraping                                                                    | `[]`                                      |
+| `affinity`                          | Node affinity to run the descheduler cronjob/deployment on specific nodes                                             | `nil`                                     |
+| `tolerations`                       | tolerations to run the descheduler cronjob/deployment on specific nodes                                               | `nil`                                     |
+| `suspend`                           | Set spec.suspend in descheduler cronjob                                                                               | `false`                                   |
+| `commonLabels`                      | Labels to apply to all resources                                                                                      | `{}`                                      |
+| `livenessProbe`                     | Liveness probe configuration for the descheduler container                                                            | _see values.yaml_                         |
--- a/charts/descheduler/templates/NOTES.txt
+++ b/charts/descheduler/templates/NOTES.txt
@@ -1 +1,12 @@
-Descheduler installed as a cron job.
+Descheduler installed as a {{ .Values.kind }}.
+
+{{- if eq .Values.kind "Deployment" }}
+{{- if eq .Values.replicas 1.0}}
+WARNING: You set replica count as 1 and workload kind as Deployment however leaderElection is not enabled. Consider enabling Leader Election for HA mode.
+{{- end}}
+{{- if .Values.leaderElection }}
+{{- if and (hasKey .Values.cmdOptions "dry-run") (eq (get .Values.cmdOptions "dry-run") true) }}
+WARNING: You enabled DryRun mode, you can't use Leader Election.
+{{- end}}
+{{- end}}
+{{- end}}
--- a/charts/descheduler/templates/_helpers.tpl
+++ b/charts/descheduler/templates/_helpers.tpl
@@ -42,6 +42,9 @@ app.kubernetes.io/instance: {{ .Release.Name }}
 app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
 {{- end }}
 app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- if .Values.commonLabels}}
+{{ toYaml .Values.commonLabels }}
+{{- end }}
 {{- end -}}

 {{/*
@@ -62,3 +65,30 @@ Create the name of the service account to use
    {{ default "default" .Values.serviceAccount.name }}
 {{- end -}}
 {{- end -}}
+
+{{/*
+Leader Election
+*/}}
+{{- define "descheduler.leaderElection"}}
+{{- if .Values.leaderElection -}}
+- --leader-elect={{ .Values.leaderElection.enabled }}
+{{- if .Values.leaderElection.leaseDuration }}
+- --leader-elect-lease-duration={{ .Values.leaderElection.leaseDuration }}
+{{- end }}
+{{- if .Values.leaderElection.renewDeadline }}
+- --leader-elect-renew-deadline={{ .Values.leaderElection.renewDeadline }}
+{{- end }}
+{{- if .Values.leaderElection.retryPeriod }}
+- --leader-elect-retry-period={{ .Values.leaderElection.retryPeriod }}
+{{- end }}
+{{- if .Values.leaderElection.resourceLock }}
+- --leader-elect-resource-lock={{ .Values.leaderElection.resourceLock }}
+{{- end }}
+{{- if .Values.leaderElection.resourceName }}
+- --leader-elect-resource-name={{ .Values.leaderElection.resourceName }}
+{{- end }}
+{{- if .Values.leaderElection.resourceNamescape }}
+- --leader-elect-resource-namespace={{ .Values.leaderElection.resourceNamescape }}
+{{- end -}}
+{{- end }}
+{{- end }}
--- a/charts/descheduler/templates/clusterrole.yaml
+++ b/charts/descheduler/templates/clusterrole.yaml
@@ -6,7 +6,7 @@ metadata:
  labels:
    {{- include "descheduler.labels" . | nindent 4 }}
 rules:
- apiGroups: [""]
+- apiGroups: ["events.k8s.io"]
  resources: ["events"]
  verbs: ["create", "update"]
 - apiGroups: [""]
@@ -14,7 +14,7 @@ rules:
  verbs: ["get", "watch", "list"]
 - apiGroups: [""]
  resources: ["namespaces"]
-  verbs: ["get", "list"]
+  verbs: ["get", "watch", "list"]
 - apiGroups: [""]
  resources: ["pods"]
  verbs: ["get", "watch", "list", "delete"]
@@ -24,11 +24,13 @@ rules:
 - apiGroups: ["scheduling.k8s.io"]
  resources: ["priorityclasses"]
  verbs: ["get", "watch", "list"]
-{{- if .Values.podSecurityPolicy.create }}
- apiGroups: ['policy']
-  resources: ['podsecuritypolicies']
-  verbs:     ['use']
-  resourceNames:
-  - {{ template "descheduler.fullname" . }}
+{{- if .Values.leaderElection.enabled }}
+- apiGroups: ["coordination.k8s.io"]
+  resources: ["leases"]
+  verbs: ["create", "update"]
+- apiGroups: ["coordination.k8s.io"]
+  resources: ["leases"]
+  resourceNames: ["{{ .Values.leaderElection.resourceName | default "descheduler" }}"]
+  verbs: ["get", "patch", "delete"]
 {{- end }}
 {{- end -}}
--- a/charts/descheduler/templates/configmap.yaml
+++ b/charts/descheduler/templates/configmap.yaml
@@ -2,10 +2,11 @@ apiVersion: v1
 kind: ConfigMap
 metadata:
  name: {{ template "descheduler.fullname" . }}
+  namespace: {{ .Release.Namespace }}
  labels:
    {{- include "descheduler.labels" . | nindent 4 }}
 data:
  policy.yaml: |
-    apiVersion: "descheduler/v1alpha1"
+    apiVersion: "{{ .Values.deschedulerPolicyAPIVersion }}"
    kind: "DeschedulerPolicy"
 {{ toYaml .Values.deschedulerPolicy | trim | indent 4 }}
--- a/charts/descheduler/templates/cronjob.yaml
+++ b/charts/descheduler/templates/cronjob.yaml
@@ -3,10 +3,14 @@ apiVersion: {{ .Values.cronJobApiVersion | default "batch/v1" }}
 kind: CronJob
 metadata:
  name: {{ template "descheduler.fullname" . }}
+  namespace: {{ .Release.Namespace }}
  labels:
    {{- include "descheduler.labels" . | nindent 4 }}
 spec:
  schedule: {{ .Values.schedule | quote }}
+  {{- if .Values.suspend }}
+  suspend: {{ .Values.suspend }}
+  {{- end }}
  concurrencyPolicy: "Forbid"
  {{- if .Values.startingDeadlineSeconds }}
  startingDeadlineSeconds: {{ .Values.startingDeadlineSeconds }}
@@ -19,6 +23,9 @@ spec:
  {{- end }}
  jobTemplate:
    spec:
+      {{- if .Values.ttlSecondsAfterFinished }}
+      ttlSecondsAfterFinished: {{ .Values.ttlSecondsAfterFinished }}
+      {{- end }}
      template:
        metadata:
          name: {{ template "descheduler.fullname" . }}
@@ -37,6 +44,10 @@ spec:
          nodeSelector:
            {{- toYaml . | nindent 12 }}
          {{- end }}
+          {{- with .Values.affinity }}
+          affinity:
+            {{- toYaml . | nindent 12 }}
+          {{- end }}
          {{- with .Values.tolerations }}
          tolerations:
            {{- toYaml . | nindent 12 }}
@@ -65,16 +76,12 @@ spec:
                - {{ $value | quote }}
                {{- end }}
                {{- end }}
+              livenessProbe:
+                {{- toYaml .Values.livenessProbe | nindent 16 }}
              resources:
                {{- toYaml .Values.resources | nindent 16 }}
              securityContext:
-                allowPrivilegeEscalation: false
-                capabilities:
-                  drop:
-                    - ALL
-                privileged: false
-                readOnlyRootFilesystem: true
-                runAsNonRoot: true
+                {{- toYaml .Values.securityContext | nindent 16 }}
              volumeMounts:
                - mountPath: /policy-dir
                  name: policy-volume
--- a/charts/descheduler/templates/deployment.yaml
+++ b/charts/descheduler/templates/deployment.yaml
@@ -3,10 +3,18 @@ apiVersion: apps/v1
 kind: Deployment
 metadata:
  name: {{ template "descheduler.fullname" . }}
+  namespace: {{ .Release.Namespace }}
  labels:
    {{- include "descheduler.labels" . | nindent 4 }}
 spec:
+  {{- if gt .Values.replicas 1.0}}
+  {{- if not .Values.leaderElection.enabled }}
+  {{- fail "You must set leaderElection to use more than 1 replica"}}
+  {{- end}}
+  replicas: {{ required "leaderElection required for running more than one replica" .Values.replicas }}
+  {{- else }}
  replicas: 1
+  {{- end }}
  selector:
    matchLabels:
      {{- include "descheduler.selectorLabels" . | nindent 6 }}
@@ -27,6 +35,10 @@ spec:
      priorityClassName: {{ .Values.priorityClassName }}
      {{- end }}
      serviceAccountName: {{ template "descheduler.serviceAccountName" . }}
+      {{- with .Values.imagePullSecrets }}
+      imagePullSecrets:
+      {{- toYaml . | nindent 10 }}
+      {{- end }}
      containers:
        - name: {{ .Chart.Name }}
          image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default (printf "v%s" .Chart.AppVersion) }}"
@@ -44,19 +56,16 @@ spec:
            - {{ $value | quote }}
            {{- end }}
            {{- end }}
+            {{- include "descheduler.leaderElection" . | nindent 12 }}
          ports:
            - containerPort: 10258
              protocol: TCP
+          livenessProbe:
+            {{- toYaml .Values.livenessProbe | nindent 12 }}
          resources:
            {{- toYaml .Values.resources | nindent 12 }}
          securityContext:
-            allowPrivilegeEscalation: false
-            capabilities:
-              drop:
-                - ALL
-            privileged: false
-            readOnlyRootFilesystem: true
-            runAsNonRoot: true
+            {{- toYaml .Values.securityContext | nindent 12 }}
          volumeMounts:
            - mountPath: /policy-dir
              name: policy-volume
@@ -68,6 +77,10 @@ spec:
      nodeSelector:
        {{- toYaml . | nindent 8 }}
      {{- end }}
+      {{- with .Values.affinity }}
+      affinity:
+        {{- toYaml . | nindent 8 }}
+      {{- end }}
      {{- with .Values.tolerations }}
      tolerations:
        {{- toYaml . | nindent 8 }}
--- a/charts/descheduler/templates/podsecuritypolicy.yaml
+++ b/charts/descheduler/templates/podsecuritypolicy.yaml
@@ -1,38 +0,0 @@
-{{- if .Values.podSecurityPolicy.create -}}
-apiVersion: policy/v1beta1
-kind: PodSecurityPolicy
-metadata:
-  name: {{ template "descheduler.fullname" . }}
-  annotations:
-    seccomp.security.alpha.kubernetes.io/allowedProfileNames: 'docker/default,runtime/default'
-    seccomp.security.alpha.kubernetes.io/defaultProfileName:  'runtime/default'
-spec:
-  privileged: false
-  allowPrivilegeEscalation: false
-  requiredDropCapabilities:
-    - ALL
-  volumes:
-    - 'configMap'
-    - 'secret'
-  hostNetwork: false
-  hostIPC: false
-  hostPID: false
-  runAsUser:
-    rule: 'MustRunAs'
-    ranges:
-      - min: 1
-        max: 65535
-  seLinux:
-    rule: 'RunAsAny'
-  supplementalGroups:
-    rule: 'MustRunAs'
-    ranges:
-      - min: 1
-        max: 65535
-  fsGroup:
-    rule: 'MustRunAs'
-    ranges:
-      - min: 1
-        max: 65535
-  readOnlyRootFilesystem: true
-{{- end -}}
--- a/charts/descheduler/templates/service.yaml
+++ b/charts/descheduler/templates/service.yaml
@@ -0,0 +1,21 @@
+{{- if eq .Values.kind "Deployment" }}
+{{- if eq .Values.service.enabled true }}
+apiVersion: v1
+kind: Service
+metadata:
+  labels:
+    {{- include "descheduler.labels" . | nindent 4 }}
+  name: {{ template "descheduler.fullname" . }}
+  namespace: {{ .Release.Namespace }}
+spec:
+  clusterIP: None
+  ports:
+  - name: http-metrics
+    port: 10258
+    protocol: TCP
+    targetPort: 10258
+  selector:
+    {{- include "descheduler.selectorLabels" . | nindent 4 }}
+  type: ClusterIP
+{{- end }}
+{{- end }}
--- a/charts/descheduler/templates/serviceaccount.yaml
+++ b/charts/descheduler/templates/serviceaccount.yaml
@@ -3,6 +3,10 @@ apiVersion: v1
 kind: ServiceAccount
 metadata:
  name: {{ template "descheduler.serviceAccountName" . }}
+  namespace: {{ .Release.Namespace }}
  labels:
    {{- include "descheduler.labels" . | nindent 4 }}
+{{- if .Values.serviceAccount.annotations }}
+  annotations: {{ toYaml .Values.serviceAccount.annotations | nindent 4 }}
+{{- end }}
 {{- end -}}
--- a/charts/descheduler/templates/servicemonitor.yaml
+++ b/charts/descheduler/templates/servicemonitor.yaml
@@ -0,0 +1,41 @@
+{{- if eq .Values.kind "Deployment" }}
+{{- if eq .Values.serviceMonitor.enabled true }}
+apiVersion: monitoring.coreos.com/v1
+kind: ServiceMonitor
+metadata:
+  name: {{ template "descheduler.fullname" . }}-servicemonitor
+  namespace: {{ .Values.serviceMonitor.namespace | default .Release.Namespace }}
+  labels:
+    {{- include "descheduler.labels" . | nindent 4 }}
+spec:
+  jobLabel: jobLabel
+  namespaceSelector:
+    matchNames:
+    - {{ .Release.Namespace }}
+  selector:
+    matchLabels:
+      {{- include "descheduler.selectorLabels" . | nindent 6 }}
+  endpoints:
+  - honorLabels: {{ .Values.serviceMonitor.honorLabels | default true }}
+    port: http-metrics
+    {{- if .Values.serviceMonitor.interval }}
+    interval: {{ .Values.serviceMonitor.interval }}
+    {{- end }}
+    scheme: https
+    tlsConfig:
+      {{- if eq .Values.serviceMonitor.insecureSkipVerify true }}
+      insecureSkipVerify: true
+      {{- end }}
+      {{- if .Values.serviceMonitor.serverName }}
+      serverName: {{ .Values.serviceMonitor.serverName }}
+      {{- end}}
+{{- if .Values.serviceMonitor.metricRelabelings }}
+    metricRelabelings:
+{{ tpl (toYaml .Values.serviceMonitor.metricRelabelings | indent 4) . }}
+{{- end }}
+{{- if .Values.serviceMonitor.relabelings }}
+    relabelings:
+{{ tpl (toYaml .Values.serviceMonitor.relabelings | indent 4) . }}
+{{- end }}
+{{- end }}
+{{- end }}
--- a/charts/descheduler/templates/tests/test-descheduler-pod.yaml
+++ b/charts/descheduler/templates/tests/test-descheduler-pod.yaml
@@ -1,29 +0,0 @@
-apiVersion: v1
-kind: Pod
-metadata:
-    name: descheduler-test-pod
-    annotations:
-      "helm.sh/hook": test
-spec:
-    restartPolicy: Never
-    serviceAccountName: descheduler-ci
-    containers:
-      - name: descheduler-test-container
-        image: alpine:latest
-        imagePullPolicy: IfNotPresent
-        securityContext:
-            allowPrivilegeEscalation: false
-            capabilities:
-              drop:
-                - All
-            privileged: false
-            runAsNonRoot: false 
-        command: ["/bin/ash"]
-        args:
-          - -c
-          - >-
-            apk --no-cache add curl &&
-            curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl &&
-            chmod +x ./kubectl &&
-            mv ./kubectl /usr/local/bin/kubectl &&
-            /usr/local/bin/kubectl get pods --namespace kube-system --token "$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" | grep "descheduler" | grep "Completed"
--- a/charts/descheduler/values.yaml
+++ b/charts/descheduler/values.yaml
@@ -6,12 +6,13 @@
 kind: CronJob

 image:
-  repository: k8s.gcr.io/descheduler/descheduler
+  repository: registry.k8s.io/descheduler/descheduler
  # Overrides the image tag whose default is the chart version
  tag: ""
  pullPolicy: IfNotPresent

-imagePullSecrets: []
+imagePullSecrets:
+#   - name: container-registry-secret

 resources:
  requests:
@@ -21,28 +22,72 @@ resources:
  #   cpu: 100m
  #   memory: 128Mi

+securityContext:
+  allowPrivilegeEscalation: false
+  capabilities:
+    drop:
+      - ALL
+  privileged: false
+  readOnlyRootFilesystem: true
+  runAsNonRoot: true
+  runAsUser: 1000
+
 nameOverride: ""
 fullnameOverride: ""

-cronJobApiVersion: "batch/v1"  # Use "batch/v1beta1" for k8s version < 1.21.0. TODO(@7i) remove with 1.23 release
+# labels that'll be applied to all resources
+commonLabels: {}
+
+cronJobApiVersion: "batch/v1"
 schedule: "*/2 * * * *"
-#startingDeadlineSeconds: 200
-#successfulJobsHistoryLimit: 1
-#failedJobsHistoryLimit: 1
+suspend: false
+# startingDeadlineSeconds: 200
+# successfulJobsHistoryLimit: 3
+# failedJobsHistoryLimit: 1
+# ttlSecondsAfterFinished 600

 # Required when running as a Deployment
 deschedulingInterval: 5m

+# Specifies the replica count for Deployment
+# Set leaderElection if you want to use more than 1 replica
+# Set affinity.podAntiAffinity rule if you want to schedule onto a node
+# only if that node is in the same zone as at least one already-running descheduler
+replicas: 1
+
+# Specifies whether Leader Election resources should be created
+# Required when running as a Deployment
+# NOTE: Leader election can't be activated if DryRun enabled
+leaderElection: {}
+#  enabled: true
+#  leaseDuration: 15s
+#  renewDeadline: 10s
+#  retryPeriod: 2s
+#  resourceLock: "leases"
+#  resourceName: "descheduler"
+#  resourceNamescape: "kube-system"
+
 cmdOptions:
  v: 3
-  # evict-local-storage-pods:
-  # max-pods-to-evict-per-node: 10
-  # node-selector: "key1=value1,key2=value2"
+
+# Recommended to use the latest Policy API version supported by the Descheduler app version
+deschedulerPolicyAPIVersion: "descheduler/v1alpha1"

 deschedulerPolicy:
+  # nodeSelector: "key1=value1,key2=value2"
+  # maxNoOfPodsToEvictPerNode: 10
+  # maxNoOfPodsToEvictPerNamespace: 10
+  # ignorePvcPods: true
+  # evictLocalStoragePods: true
  strategies:
    RemoveDuplicates:
      enabled: true
+    RemovePodsHavingTooManyRestarts:
+      enabled: true
+      params:
+        podsHavingTooManyRestarts:
+          podRestartThreshold: 100
+          includingInitContainers: true
    RemovePodsViolatingNodeTaints:
      enabled: true
    RemovePodsViolatingNodeAffinity:
@@ -52,6 +97,10 @@ deschedulerPolicy:
        - requiredDuringSchedulingIgnoredDuringExecution
    RemovePodsViolatingInterPodAntiAffinity:
      enabled: true
+    RemovePodsViolatingTopologySpreadConstraint:
+      enabled: true
+      params:
+        includeSoftConstraints: false
    LowNodeUtilization:
      enabled: true
      params:
@@ -70,6 +119,25 @@ priorityClassName: system-cluster-critical
 nodeSelector: {}
 #  foo: bar

+affinity: {}
+# nodeAffinity:
+#   requiredDuringSchedulingIgnoredDuringExecution:
+#     nodeSelectorTerms:
+#     - matchExpressions:
+#       - key: kubernetes.io/e2e-az-name
+#         operator: In
+#         values:
+#         - e2e-az1
+#         - e2e-az2
+#  podAntiAffinity:
+#    requiredDuringSchedulingIgnoredDuringExecution:
+#      - labelSelector:
+#          matchExpressions:
+#            - key: app.kubernetes.io/name
+#              operator: In
+#              values:
+#                - descheduler
+#        topologyKey: "kubernetes.io/hostname"
 tolerations: []
 # - key: 'management'
 #   operator: 'Equal'
@@ -80,13 +148,47 @@ rbac:
  # Specifies whether RBAC resources should be created
  create: true

-podSecurityPolicy:
-  # Specifies whether PodSecurityPolicy should be created.
-  create: true
-
 serviceAccount:
  # Specifies whether a ServiceAccount should be created
  create: true
  # The name of the ServiceAccount to use.
  # If not set and create is true, a name is generated using the fullname template
  name:
+  # Specifies custom annotations for the serviceAccount
+  annotations: {}
+
+podAnnotations: {}
+
+podLabels: {}
+
+livenessProbe:
+  failureThreshold: 3
+  httpGet:
+    path: /healthz
+    port: 10258
+    scheme: HTTPS
+  initialDelaySeconds: 3
+  periodSeconds: 10
+
+service:
+  enabled: false
+
+serviceMonitor:
+  enabled: false
+  # The namespace where Prometheus expects to find service monitors.
+  # namespace: ""
+  interval: ""
+  # honorLabels: true
+  insecureSkipVerify: true
+  serverName: null
+  metricRelabelings: []
+    # - action: keep
+    #   regex: 'descheduler_(build_info|pods_evicted)'
+    #   sourceLabels: [__name__]
+  relabelings: []
+    # - sourceLabels: [__meta_kubernetes_pod_node_name]
+    #   separator: ;
+    #   regex: ^(.*)$
+    #   targetLabel: nodename
+    #   replacement: $1
+    #   action: replace
--- a/cloudbuild.yaml
+++ b/cloudbuild.yaml
@@ -1,13 +1,13 @@
 # See https://cloud.google.com/cloud-build/docs/build-config

 # this must be specified in seconds. If omitted, defaults to 600s (10 mins)
-timeout: 1200s
+timeout: 3600s
 # this prevents errors if you don't use both _GIT_TAG and _PULL_BASE_REF,
 # or any new substitutions added in the future.
 options:
  substitution_option: ALLOW_LOOSE
 steps:
-  - name: 'gcr.io/k8s-testimages/gcb-docker-gcloud:v20190906-745fed4'
+  - name: 'gcr.io/k8s-staging-test-infra/gcb-docker-gcloud:v20211118-2f2d816b90'
    entrypoint: make
    env:
    - DOCKER_CLI_EXPERIMENTAL=enabled
--- a/cmd/descheduler/app/options/options.go
+++ b/cmd/descheduler/app/options/options.go
@@ -18,13 +18,14 @@ limitations under the License.
 package options

 import (
-	"github.com/spf13/pflag"
+	"time"

-	utilerrors "k8s.io/apimachinery/pkg/util/errors"
+	"github.com/spf13/pflag"
+	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
 	apiserveroptions "k8s.io/apiserver/pkg/server/options"
 	clientset "k8s.io/client-go/kubernetes"
-	"k8s.io/component-base/logs"
-
+	componentbaseconfig "k8s.io/component-base/config"
+	componentbaseoptions "k8s.io/component-base/config/options"
 	"sigs.k8s.io/descheduler/pkg/apis/componentconfig"
 	"sigs.k8s.io/descheduler/pkg/apis/componentconfig/v1alpha1"
 	deschedulerscheme "sigs.k8s.io/descheduler/pkg/descheduler/scheme"
@@ -39,7 +40,7 @@ type DeschedulerServer struct {
 	componentconfig.DeschedulerConfiguration

 	Client         clientset.Interface
-	Logs           *logs.Options
+	EventClient    clientset.Interface
 	SecureServing  *apiserveroptions.SecureServingOptionsWithLoopback
 	DisableMetrics bool
 }
@@ -56,20 +57,22 @@ func NewDeschedulerServer() (*DeschedulerServer, error) {

 	return &DeschedulerServer{
 		DeschedulerConfiguration: *cfg,
-		Logs:                     logs.NewOptions(),
 		SecureServing:            secureServing,
 	}, nil
 }

-// Validation checks for DeschedulerServer.
-func (s *DeschedulerServer) Validate() error {
-	var errs []error
-	errs = append(errs, s.Logs.Validate()...)
-	return utilerrors.NewAggregate(errs)
-}
-
 func newDefaultComponentConfig() (*componentconfig.DeschedulerConfiguration, error) {
-	versionedCfg := v1alpha1.DeschedulerConfiguration{}
+	versionedCfg := v1alpha1.DeschedulerConfiguration{
+		LeaderElection: componentbaseconfig.LeaderElectionConfiguration{
+			LeaderElect:       false,
+			LeaseDuration:     metav1.Duration{Duration: 137 * time.Second},
+			RenewDeadline:     metav1.Duration{Duration: 107 * time.Second},
+			RetryPeriod:       metav1.Duration{Duration: 26 * time.Second},
+			ResourceLock:      "leases",
+			ResourceName:      "descheduler",
+			ResourceNamespace: "kube-system",
+		},
+	}
 	deschedulerscheme.Scheme.Default(&versionedCfg)
 	cfg := componentconfig.DeschedulerConfiguration{}
 	if err := deschedulerscheme.Scheme.Convert(&versionedCfg, &cfg, nil); err != nil {
@@ -80,18 +83,17 @@ func newDefaultComponentConfig() (*componentconfig.DeschedulerConfiguration, err

 // AddFlags adds flags for a specific SchedulerServer to the specified FlagSet
 func (rs *DeschedulerServer) AddFlags(fs *pflag.FlagSet) {
-	fs.StringVar(&rs.Logging.Format, "logging-format", "text", `Sets the log format. Permitted formats: "text", "json". Non-default formats don't honor these flags: --add-dir-header, --alsologtostderr, --log-backtrace-at, --log-dir, --log-file, --log-file-max-size, --logtostderr, --skip-headers, --skip-log-headers, --stderrthreshold, --log-flush-frequency.\nNon-default choices are currently alpha and subject to change without warning.`)
+	fs.StringVar(&rs.Logging.Format, "logging-format", "text", `Sets the log format. Permitted formats: "text", "json". Non-default formats don't honor these flags: --add-dir-header, --alsologtostderr, --log-backtrace-at, --log_dir, --log_file, --log_file_max_size, --logtostderr, --skip-headers, --skip-log-headers, --stderrthreshold, --log-flush-frequency.\nNon-default choices are currently alpha and subject to change without warning.`)
 	fs.DurationVar(&rs.DeschedulingInterval, "descheduling-interval", rs.DeschedulingInterval, "Time interval between two consecutive descheduler executions. Setting this value instructs the descheduler to run in a continuous loop at the interval specified.")
-	fs.StringVar(&rs.KubeconfigFile, "kubeconfig", rs.KubeconfigFile, "File with  kube configuration.")
+	fs.StringVar(&rs.ClientConnection.Kubeconfig, "kubeconfig", rs.ClientConnection.Kubeconfig, "File with kube configuration. Deprecated, use client-connection-kubeconfig instead.")
+	fs.StringVar(&rs.ClientConnection.Kubeconfig, "client-connection-kubeconfig", rs.ClientConnection.Kubeconfig, "File path to kube configuration for interacting with kubernetes apiserver.")
+	fs.Float32Var(&rs.ClientConnection.QPS, "client-connection-qps", rs.ClientConnection.QPS, "QPS to use for interacting with kubernetes apiserver.")
+	fs.Int32Var(&rs.ClientConnection.Burst, "client-connection-burst", rs.ClientConnection.Burst, "Burst to use for interacting with kubernetes apiserver.")
 	fs.StringVar(&rs.PolicyConfigFile, "policy-config-file", rs.PolicyConfigFile, "File with descheduler policy configuration.")
-	fs.BoolVar(&rs.DryRun, "dry-run", rs.DryRun, "execute descheduler in dry run mode.")
-	// node-selector query causes descheduler to run only on nodes that matches the node labels in the query
-	fs.StringVar(&rs.NodeSelector, "node-selector", rs.NodeSelector, "DEPRECATED: selector (label query) to filter on, supports '=', '==', and '!='.(e.g. -l key1=value1,key2=value2)")
-	// max-no-pods-to-evict limits the maximum number of pods to be evicted per node by descheduler.
-	fs.IntVar(&rs.MaxNoOfPodsToEvictPerNode, "max-pods-to-evict-per-node", rs.MaxNoOfPodsToEvictPerNode, "DEPRECATED: limits the maximum number of pods to be evicted per node by descheduler")
-	// evict-local-storage-pods allows eviction of pods that are using local storage. This is false by default.
-	fs.BoolVar(&rs.EvictLocalStoragePods, "evict-local-storage-pods", rs.EvictLocalStoragePods, "DEPRECATED: enables evicting pods using local storage by descheduler")
+	fs.BoolVar(&rs.DryRun, "dry-run", rs.DryRun, "Execute descheduler in dry run mode.")
 	fs.BoolVar(&rs.DisableMetrics, "disable-metrics", rs.DisableMetrics, "Disables metrics. The metrics are by default served through https://localhost:10258/metrics. Secure address, resp. port can be changed through --bind-address, resp. --secure-port flags.")

+	componentbaseoptions.BindLeaderElectionFlags(&rs.LeaderElection, fs)
+
 	rs.SecureServing.AddFlags(fs)
 }
--- a/cmd/descheduler/app/server.go
+++ b/cmd/descheduler/app/server.go
@@ -19,8 +19,12 @@ package app

 import (
 	"context"
-	"flag"
 	"io"
+	"os"
+	"os/signal"
+	"syscall"
+
+	"k8s.io/apiserver/pkg/server/healthz"

 	"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
 	"sigs.k8s.io/descheduler/pkg/descheduler"
@@ -30,7 +34,9 @@ import (
 	apiserver "k8s.io/apiserver/pkg/server"
 	"k8s.io/apiserver/pkg/server/mux"
 	restclient "k8s.io/client-go/rest"
-	aflag "k8s.io/component-base/cli/flag"
+	registry "k8s.io/component-base/logs/api/v1"
+	jsonLog "k8s.io/component-base/logs/json"
+	_ "k8s.io/component-base/logs/json/register"
 	"k8s.io/component-base/metrics/legacyregistry"
 	"k8s.io/klog/v2"
 )
@@ -38,7 +44,6 @@ import (
 // NewDeschedulerCommand creates a *cobra.Command object with default parameters
 func NewDeschedulerCommand(out io.Writer) *cobra.Command {
 	s, err := options.NewDeschedulerServer()
-
 	if err != nil {
 		klog.ErrorS(err, "unable to initialize server")
 	}
@@ -48,8 +53,7 @@ func NewDeschedulerCommand(out io.Writer) *cobra.Command {
 		Short: "descheduler",
 		Long:  `The descheduler evicts pods which may be bound to less desired nodes`,
 		Run: func(cmd *cobra.Command, args []string) {
-			s.Logs.Config.Format = s.Logging.Format
-			s.Logs.Apply()
+			// s.Logs.Config.Format = s.Logging.Format

 			// LoopbackClientConfig is a config for a privileged loopback connection
 			var LoopbackClientConfig *restclient.Config
@@ -59,36 +63,58 @@ func NewDeschedulerCommand(out io.Writer) *cobra.Command {
 				return
 			}

-			if err := s.Validate(); err != nil {
-				klog.ErrorS(err, "failed to validate server configuration")
+			var factory registry.LogFormatFactory
+			if s.Logging.Format == "json" {
+				factory = jsonLog.Factory{}
+			}
+
+			if factory == nil {
+				klog.ClearLogger()
+			} else {
+				log, loggerControl := factory.Create(registry.LoggingConfiguration{
+					Format:    s.Logging.Format,
+					Verbosity: s.Logging.Verbosity,
+				}, registry.LoggingOptions{})
+				defer loggerControl.Flush()
+				klog.SetLogger(log)
+			}
+
+			ctx, done := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
+
+			pathRecorderMux := mux.NewPathRecorderMux("descheduler")
+			if !s.DisableMetrics {
+				pathRecorderMux.Handle("/metrics", legacyregistry.HandlerWithReset())
+			}
+
+			healthz.InstallHandler(pathRecorderMux, healthz.NamedCheck("Descheduler", healthz.PingHealthz.Check))
+
+			stoppedCh, _, err := SecureServing.Serve(pathRecorderMux, 0, ctx.Done())
+			if err != nil {
+				klog.Fatalf("failed to start secure server: %v", err)
 				return
 			}

-			if !s.DisableMetrics {
-				ctx := context.TODO()
-				pathRecorderMux := mux.NewPathRecorderMux("descheduler")
-				pathRecorderMux.Handle("/metrics", legacyregistry.HandlerWithReset())
-
-				if _, err := SecureServing.Serve(pathRecorderMux, 0, ctx.Done()); err != nil {
-					klog.Fatalf("failed to start secure server: %v", err)
-					return
-				}
-			}
-
-			err := Run(s)
+			err = Run(ctx, s)
 			if err != nil {
 				klog.ErrorS(err, "descheduler server")
 			}
+
+			done()
+			// wait for metrics server to close
+			<-stoppedCh
 		},
 	}
 	cmd.SetOut(out)
 	flags := cmd.Flags()
-	flags.SetNormalizeFunc(aflag.WordSepNormalizeFunc)
-	flags.AddGoFlagSet(flag.CommandLine)
 	s.AddFlags(flags)
 	return cmd
 }

-func Run(rs *options.DeschedulerServer) error {
-	return descheduler.Run(rs)
+func Run(ctx context.Context, rs *options.DeschedulerServer) error {
+	return descheduler.Run(ctx, rs)
+}
+
+func SetupLogs() {
+	klog.SetOutput(os.Stdout)
+	klog.InitFlags(nil)
 }
--- a/cmd/descheduler/app/version.go
+++ b/cmd/descheduler/app/version.go
@@ -24,7 +24,7 @@ import (
 )

 func NewVersionCommand() *cobra.Command {
-	var versionCmd = &cobra.Command{
+	versionCmd := &cobra.Command{
 		Use:   "version",
 		Short: "Version of descheduler",
 		Long:  `Prints the version of descheduler.`,
--- a/cmd/descheduler/descheduler.go
+++ b/cmd/descheduler/descheduler.go
@@ -17,22 +17,23 @@ limitations under the License.
 package main

 import (
-	"fmt"
-	"k8s.io/component-base/logs"
 	"os"
+
+	"k8s.io/component-base/cli"
 	"sigs.k8s.io/descheduler/cmd/descheduler/app"
+	"sigs.k8s.io/descheduler/pkg/descheduler"
 )

+func init() {
+	app.SetupLogs()
+	descheduler.SetupPlugins()
+}
+
 func main() {
 	out := os.Stdout
 	cmd := app.NewDeschedulerCommand(out)
 	cmd.AddCommand(app.NewVersionCommand())

-	logs.InitLogs()
-	defer logs.FlushLogs()
-
-	if err := cmd.Execute(); err != nil {
-		fmt.Println(err)
-		os.Exit(1)
-	}
+	code := cli.Run(cmd)
+	os.Exit(code)
 }
--- a/docs/cli/descheduler.md
+++ b/docs/cli/descheduler.md
@@ -0,0 +1,51 @@
+## descheduler
+
+descheduler
+
+### Synopsis
+
+The descheduler evicts pods which may be bound to less desired nodes
+
+```
+descheduler [flags]
+```
+
+### Options
+
+```
+      --bind-address ip                          The IP address on which to listen for the --secure-port port. The associated interface(s) must be reachable by the rest of the cluster, and by CLI/web clients. If blank or an unspecified address (0.0.0.0 or ::), all interfaces will be used. (default 0.0.0.0)
+      --cert-dir string                          The directory where the TLS certs are located. If --tls-cert-file and --tls-private-key-file are provided, this flag will be ignored. (default "apiserver.local.config/certificates")
+      --client-connection-burst int32            Burst to use for interacting with kubernetes apiserver.
+      --client-connection-kubeconfig string      File path to kube configuration for interacting with kubernetes apiserver.
+      --client-connection-qps float32            QPS to use for interacting with kubernetes apiserver.
+      --descheduling-interval duration           Time interval between two consecutive descheduler executions. Setting this value instructs the descheduler to run in a continuous loop at the interval specified.
+      --disable-metrics                          Disables metrics. The metrics are by default served through https://localhost:10258/metrics. Secure address, resp. port can be changed through --bind-address, resp. --secure-port flags.
+      --dry-run                                  Execute descheduler in dry run mode.
+  -h, --help                                     help for descheduler
+      --http2-max-streams-per-connection int     The limit that the server gives to clients for the maximum number of streams in an HTTP/2 connection. Zero means to use golang's default.
+      --kubeconfig string                        File with kube configuration. Deprecated, use client-connection-kubeconfig instead.
+      --leader-elect                             Start a leader election client and gain leadership before executing the main loop. Enable this when running replicated components for high availability.
+      --leader-elect-lease-duration duration     The duration that non-leader candidates will wait after observing a leadership renewal until attempting to acquire leadership of a led but unrenewed leader slot. This is effectively the maximum duration that a leader can be stopped before it is replaced by another candidate. This is only applicable if leader election is enabled. (default 2m17s)
+      --leader-elect-renew-deadline duration     The interval between attempts by the acting master to renew a leadership slot before it stops leading. This must be less than the lease duration. This is only applicable if leader election is enabled. (default 1m47s)
+      --leader-elect-resource-lock string        The type of resource object that is used for locking during leader election. Supported options are 'leases', 'endpointsleases' and 'configmapsleases'. (default "leases")
+      --leader-elect-resource-name string        The name of resource object that is used for locking during leader election. (default "descheduler")
+      --leader-elect-resource-namespace string   The namespace of resource object that is used for locking during leader election. (default "kube-system")
+      --leader-elect-retry-period duration       The duration the clients should wait between attempting acquisition and renewal of a leadership. This is only applicable if leader election is enabled. (default 26s)
+      --logging-format string                    Sets the log format. Permitted formats: "text", "json". Non-default formats don't honor these flags: --add-dir-header, --alsologtostderr, --log-backtrace-at, --log_dir, --log_file, --log_file_max_size, --logtostderr, --skip-headers, --skip-log-headers, --stderrthreshold, --log-flush-frequency.\nNon-default choices are currently alpha and subject to change without warning. (default "text")
+      --permit-address-sharing                   If true, SO_REUSEADDR will be used when binding the port. This allows binding to wildcard IPs like 0.0.0.0 and specific IPs in parallel, and it avoids waiting for the kernel to release sockets in TIME_WAIT state. [default=false]
+      --permit-port-sharing                      If true, SO_REUSEPORT will be used when binding the port, which allows more than one instance to bind on the same address and port. [default=false]
+      --policy-config-file string                File with descheduler policy configuration.
+      --secure-port int                          The port on which to serve HTTPS with authentication and authorization. If 0, don't serve HTTPS at all. (default 10258)
+      --tls-cert-file string                     File containing the default x509 Certificate for HTTPS. (CA cert, if any, concatenated after server cert). If HTTPS serving is enabled, and --tls-cert-file and --tls-private-key-file are not provided, a self-signed certificate and key are generated for the public address and saved to the directory specified by --cert-dir.
+      --tls-cipher-suites strings                Comma-separated list of cipher suites for the server. If omitted, the default Go cipher suites will be used. 
+                                                 Preferred values: TLS_AES_128_GCM_SHA256, TLS_AES_256_GCM_SHA384, TLS_CHACHA20_POLY1305_SHA256, TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA, TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA, TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384, TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305, TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA, TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256, TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA, TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384, TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305, TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256, TLS_RSA_WITH_AES_128_CBC_SHA, TLS_RSA_WITH_AES_128_GCM_SHA256, TLS_RSA_WITH_AES_256_CBC_SHA, TLS_RSA_WITH_AES_256_GCM_SHA384. 
+                                                 Insecure values: TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256, TLS_ECDHE_ECDSA_WITH_RC4_128_SHA, TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA, TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256, TLS_ECDHE_RSA_WITH_RC4_128_SHA, TLS_RSA_WITH_3DES_EDE_CBC_SHA, TLS_RSA_WITH_AES_128_CBC_SHA256, TLS_RSA_WITH_RC4_128_SHA.
+      --tls-min-version string                   Minimum TLS version supported. Possible values: VersionTLS10, VersionTLS11, VersionTLS12, VersionTLS13
+      --tls-private-key-file string              File containing the default x509 private key matching --tls-cert-file.
+      --tls-sni-cert-key namedCertKey            A pair of x509 certificate and private key file paths, optionally suffixed with a list of domain patterns which are fully qualified domain names, possibly with prefixed wildcard segments. The domain patterns also allow IP addresses, but IPs should only be used if the apiserver has visibility to the IP address requested by a client. If no domain patterns are provided, the names of the certificate are extracted. Non-wildcard matches trump over wildcard matches, explicit domain patterns trump over extracted names. For multiple key/certificate pairs, use the --tls-sni-cert-key multiple times. Examples: "example.crt,example.key" or "foo.crt,foo.key:*.foo.com,foo.com". (default [])
+```
+
+### SEE ALSO
+
+* [descheduler version](descheduler_version.md)	 - Version of descheduler
+
--- a/docs/cli/descheduler_version.md
+++ b/docs/cli/descheduler_version.md
@@ -0,0 +1,22 @@
+## descheduler version
+
+Version of descheduler
+
+### Synopsis
+
+Prints the version of descheduler.
+
+```
+descheduler version [flags]
+```
+
+### Options
+
+```
+  -h, --help   help for version
+```
+
+### SEE ALSO
+
+* [descheduler](descheduler.md)	 - descheduler
+
--- a/docs/contributor-guide.md
+++ b/docs/contributor-guide.md
@@ -20,7 +20,7 @@ make

 Run descheduler.
 ```sh
-./_output/bin/descheduler --kubeconfig <path to kubeconfig> --policy-config-file <path-to-policy-file> --v 1
+./_output/bin/descheduler --client-connection-kubeconfig <path to kubeconfig> --policy-config-file <path-to-policy-file> --v 1
 ```

 View all CLI options.
@@ -31,7 +31,7 @@ View all CLI options.
 ## Run Tests
 ```
 GOOS=linux make dev-image
-kind create cluster --config hack/kind_config.yaml
+make kind-multi-node
 kind load docker-image <image name>
 kind get kubeconfig > /tmp/admin.conf
 export KUBECONFIG=/tmp/admin.conf
@@ -39,17 +39,38 @@ make test-unit
 make test-e2e
 ```

-## Run Helm Tests
-Run the helm test for a particular descheduler release by setting below variables,
-```
-HELM_IMAGE_REPO="descheduler"
-HELM_IMAGE_TAG="helm-test"
-HELM_CHART_LOCATION="./charts/descheduler"
-```
-The helm tests runs as part of descheduler CI. But, to run it manually from the descheduler root,
+## Format Code
+
+After making changes in the code base, ensure that the code is formatted correctly:

 ```
-make test-helm
+make fmt
+```
+
+## Build Helm Package locally
+
+If you made some changes in the chart, and just want to check if templating is ok, or if the chart is buildable, you can run this command to have a package built from the `./charts` directory.
+
+```
+make build-helm
+```
+
+## Lint Helm Chart locally
+
+To check linting of your changes in the helm chart locally you can run:
+
+```
+make lint-chart
+```
+
+## Test helm changes locally with kind and ct
+
+You will need kind and docker (or equivalent) installed. We can use ct public image to avoid installing ct and all its dependencies.
+
+
+```
+make kind-multi-node
+make ct-helm
 ```

 ### Miscellaneous
--- a/docs/deprecated/v1alpha1.md
+++ b/docs/deprecated/v1alpha1.md
@@ -0,0 +1,784 @@
+[![Go Report Card](https://goreportcard.com/badge/sigs.k8s.io/descheduler)](https://goreportcard.com/report/sigs.k8s.io/descheduler)
+![Release Charts](https://github.com/kubernetes-sigs/descheduler/workflows/Release%20Charts/badge.svg)
+
+<p align="center">
+    <img src="assets/logo/descheduler-stacked-color.png" width="40%" align="center" alt="descheduler">
+</p>
+
+# Descheduler for Kubernetes
+
+Scheduling in Kubernetes is the process of binding pending pods to nodes, and is performed by
+a component of Kubernetes called kube-scheduler. The scheduler's decisions, whether or where a
+pod can or can not be scheduled, are guided by its configurable policy which comprises of set of
+rules, called predicates and priorities. The scheduler's decisions are influenced by its view of
+a Kubernetes cluster at that point of time when a new pod appears for scheduling.
+As Kubernetes clusters are very dynamic and their state changes over time, there may be desire
+to move already running pods to some other nodes for various reasons:
+
+* Some nodes are under or over utilized.
+* The original scheduling decision does not hold true any more, as taints or labels are added to
+or removed from nodes, pod/node affinity requirements are not satisfied any more.
+* Some nodes failed and their pods moved to other nodes.
+* New nodes are added to clusters.
+
+Consequently, there might be several pods scheduled on less desired nodes in a cluster.
+Descheduler, based on its policy, finds pods that can be moved and evicts them. Please
+note, in current implementation, descheduler does not schedule replacement of evicted pods
+but relies on the default scheduler for that.
+
+Table of Contents
+=================
+<!-- toc -->
+- [Quick Start](#quick-start)
+  - [Run As A Job](#run-as-a-job)
+  - [Run As A CronJob](#run-as-a-cronjob)
+  - [Run As A Deployment](#run-as-a-deployment)
+  - [Install Using Helm](#install-using-helm)
+  - [Install Using Kustomize](#install-using-kustomize)
+- [User Guide](#user-guide)
+- [Policy and Strategies](#policy-and-strategies)
+  - [RemoveDuplicates](#removeduplicates)
+  - [LowNodeUtilization](#lownodeutilization)
+  - [HighNodeUtilization](#highnodeutilization)
+  - [RemovePodsViolatingInterPodAntiAffinity](#removepodsviolatinginterpodantiaffinity)
+  - [RemovePodsViolatingNodeAffinity](#removepodsviolatingnodeaffinity)
+  - [RemovePodsViolatingNodeTaints](#removepodsviolatingnodetaints)
+  - [RemovePodsViolatingTopologySpreadConstraint](#removepodsviolatingtopologyspreadconstraint)
+  - [RemovePodsHavingTooManyRestarts](#removepodshavingtoomanyrestarts)
+  - [PodLifeTime](#podlifetime)
+  - [RemoveFailedPods](#removefailedpods)
+- [Filter Pods](#filter-pods)
+  - [Namespace filtering](#namespace-filtering)
+  - [Priority filtering](#priority-filtering)
+  - [Label filtering](#label-filtering)
+  - [Node Fit filtering](#node-fit-filtering)
+- [Pod Evictions](#pod-evictions)
+  - [Pod Disruption Budget (PDB)](#pod-disruption-budget-pdb)
+- [High Availability](#high-availability)
+  - [Configure HA Mode](#configure-ha-mode)
+- [Metrics](#metrics)
+- [Compatibility Matrix](#compatibility-matrix)
+- [Getting Involved and Contributing](#getting-involved-and-contributing)
+  - [Communicating With Contributors](#communicating-with-contributors)
+- [Roadmap](#roadmap)
+  - [Code of conduct](#code-of-conduct)
+<!-- /toc -->
+
+## Quick Start
+
+The descheduler can be run as a `Job`, `CronJob`, or `Deployment` inside of a k8s cluster. It has the
+advantage of being able to be run multiple times without needing user intervention.
+The descheduler pod is run as a critical pod in the `kube-system` namespace to avoid
+being evicted by itself or by the kubelet.
+
+### Run As A Job
+
+```
+kubectl create -f kubernetes/base/rbac.yaml
+kubectl create -f kubernetes/base/configmap.yaml
+kubectl create -f kubernetes/job/job.yaml
+```
+
+### Run As A CronJob
+
+```
+kubectl create -f kubernetes/base/rbac.yaml
+kubectl create -f kubernetes/base/configmap.yaml
+kubectl create -f kubernetes/cronjob/cronjob.yaml
+```
+
+### Run As A Deployment
+
+```
+kubectl create -f kubernetes/base/rbac.yaml
+kubectl create -f kubernetes/base/configmap.yaml
+kubectl create -f kubernetes/deployment/deployment.yaml
+```
+
+### Install Using Helm
+
+Starting with release v0.18.0 there is an official helm chart that can be used to install the
+descheduler. See the [helm chart README](https://github.com/kubernetes-sigs/descheduler/blob/master/charts/descheduler/README.md) for detailed instructions.
+
+The descheduler helm chart is also listed on the [artifact hub](https://artifacthub.io/packages/helm/descheduler/descheduler).
+
+### Install Using Kustomize
+
+You can use kustomize to install descheduler.
+See the [resources | Kustomize](https://kubectl.docs.kubernetes.io/references/kustomize/cmd/build/) for detailed instructions.
+
+Run As A Job
+```
+kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/job?ref=v0.27.0' | kubectl apply -f -
+```
+
+Run As A CronJob
+```
+kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/cronjob?ref=v0.27.0' | kubectl apply -f -
+```
+
+Run As A Deployment
+```
+kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/deployment?ref=v0.27.0' | kubectl apply -f -
+```
+
+## User Guide
+
+See the [user guide](docs/user-guide.md) in the `/docs` directory.
+
+## Policy and Strategies
+
+Descheduler's policy is configurable and includes strategies that can be enabled or disabled. By default, all strategies are enabled.
+
+The policy includes a common configuration that applies to all the strategies:
+| Name | Default Value | Description |
+|------|---------------|-------------|
+| `nodeSelector` | `nil` | limiting the nodes which are processed |
+| `evictLocalStoragePods` | `false` | allows eviction of pods with local storage |
+| `evictSystemCriticalPods` | `false` | [Warning: Will evict Kubernetes system pods] allows eviction of pods with any priority, including system pods like kube-dns |
+| `ignorePvcPods` | `false` | set whether PVC pods should be evicted or ignored |
+| `maxNoOfPodsToEvictPerNode` | `nil` | maximum number of pods evicted from each node (summed through all strategies) |
+| `maxNoOfPodsToEvictPerNamespace` | `nil` | maximum number of pods evicted from each namespace (summed through all strategies) |
+| `evictFailedBarePods` | `false` | allow eviction of pods without owner references and in failed phase |
+
+As part of the policy, the parameters associated with each strategy can be configured.
+See each strategy for details on available parameters.
+
+**Policy:**
+
+```yaml
+apiVersion: "descheduler/v1alpha1"
+kind: "DeschedulerPolicy"
+nodeSelector: prod=dev
+evictFailedBarePods: false
+evictLocalStoragePods: true
+evictSystemCriticalPods: true
+maxNoOfPodsToEvictPerNode: 40
+ignorePvcPods: false
+strategies:
+  ...
+```
+
+The following diagram provides a visualization of most of the strategies to help
+categorize how strategies fit together.
+
+![Strategies diagram](strategies_diagram.png)
+
+### RemoveDuplicates
+
+This strategy makes sure that there is only one pod associated with a ReplicaSet (RS),
+ReplicationController (RC), StatefulSet, or Job running on the same node. If there are more,
+those duplicate pods are evicted for better spreading of pods in a cluster. This issue could happen
+if some nodes went down due to whatever reasons, and pods on them were moved to other nodes leading to
+more than one pod associated with a RS or RC, for example, running on the same node. Once the failed nodes
+are ready again, this strategy could be enabled to evict those duplicate pods.
+
+It provides one optional parameter, `excludeOwnerKinds`, which is a list of OwnerRef `Kind`s. If a pod
+has any of these `Kind`s listed as an `OwnerRef`, that pod will not be considered for eviction. Note that
+pods created by Deployments are considered for eviction by this strategy. The `excludeOwnerKinds` parameter
+should include `ReplicaSet` to have pods created by Deployments excluded.
+
+**Parameters:**
+
+|Name|Type|
+|---|---|
+|`excludeOwnerKinds`|list(string)|
+|`namespaces`|(see [namespace filtering](#namespace-filtering))|
+|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
+|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
+|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
+
+**Example:**
+```yaml
+apiVersion: "descheduler/v1alpha1"
+kind: "DeschedulerPolicy"
+strategies:
+  "RemoveDuplicates":
+     enabled: true
+     params:
+       removeDuplicates:
+         excludeOwnerKinds:
+         - "ReplicaSet"
+```
+
+### LowNodeUtilization
+
+This strategy finds nodes that are under utilized and evicts pods, if possible, from other nodes
+in the hope that recreation of evicted pods will be scheduled on these underutilized nodes. The
+parameters of this strategy are configured under `nodeResourceUtilizationThresholds`.
+
+The under utilization of nodes is determined by a configurable threshold `thresholds`. The threshold
+`thresholds` can be configured for cpu, memory, number of pods, and extended resources in terms of percentage (the percentage is
+calculated as the current resources requested on the node vs [total allocatable](https://kubernetes.io/docs/concepts/architecture/nodes/#capacity).
+For pods, this means the number of pods on the node as a fraction of the pod capacity set for that node).
+
+If a node's usage is below threshold for all (cpu, memory, number of pods and extended resources), the node is considered underutilized.
+Currently, pods request resource requirements are considered for computing node resource utilization.
+
+There is another configurable threshold, `targetThresholds`, that is used to compute those potential nodes
+from where pods could be evicted. If a node's usage is above targetThreshold for any (cpu, memory, number of pods, or extended resources),
+the node is considered over utilized. Any node between the thresholds, `thresholds` and `targetThresholds` is
+considered appropriately utilized and is not considered for eviction. The threshold, `targetThresholds`,
+can be configured for cpu, memory, and number of pods too in terms of percentage.
+
+These thresholds, `thresholds` and `targetThresholds`, could be tuned as per your cluster requirements. Note that this
+strategy evicts pods from `overutilized nodes` (those with usage above `targetThresholds`) to `underutilized nodes`
+(those with usage below `thresholds`), it will abort if any number of `underutilized nodes` or `overutilized nodes` is zero.
+
+Additionally, the strategy accepts a `useDeviationThresholds` parameter.
+If that parameter is set to `true`, the thresholds are considered as percentage deviations from mean resource usage.
+`thresholds` will be deducted from the mean among all nodes and `targetThresholds` will be added to the mean.
+A resource consumption above (resp. below) this window is considered as overutilization (resp. underutilization).
+
+**NOTE:** Node resource consumption is determined by the requests and limits of pods, not actual usage.
+This approach is chosen in order to maintain consistency with the kube-scheduler, which follows the same
+design for scheduling pods onto nodes. This means that resource usage as reported by Kubelet (or commands
+like `kubectl top`) may differ from the calculated consumption, due to these components reporting
+actual usage metrics. Implementing metrics-based descheduling is currently TODO for the project.
+
+**Parameters:**
+
+|Name|Type|
+|---|---|
+|`thresholds`|map(string:int)|
+|`targetThresholds`|map(string:int)|
+|`numberOfNodes`|int|
+|`useDeviationThresholds`|bool|
+|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
+|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
+|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
+|`Namespaces`|(see [namespace filtering](#namespace-filtering))|
+
+**Example:**
+
+```yaml
+apiVersion: "descheduler/v1alpha1"
+kind: "DeschedulerPolicy"
+strategies:
+  "LowNodeUtilization":
+     enabled: true
+     params:
+       nodeResourceUtilizationThresholds:
+         thresholds:
+           "cpu" : 20
+           "memory": 20
+           "pods": 20
+         targetThresholds:
+           "cpu" : 50
+           "memory": 50
+           "pods": 50
+```
+
+Policy should pass the following validation checks:
+* Three basic native types of resources are supported: `cpu`, `memory` and `pods`.
+If any of these resource types is not specified, all its thresholds default to 100% to avoid nodes going from underutilized to overutilized.
+* Extended resources are supported. For example, resource type `nvidia.com/gpu` is specified for GPU node utilization. Extended resources are optional,
+and will not be used to compute node's usage if it's not specified in `thresholds` and `targetThresholds` explicitly.
+* `thresholds` or `targetThresholds` can not be nil and they must configure exactly the same types of resources.
+* The valid range of the resource's percentage value is \[0, 100\]
+* Percentage value of `thresholds` can not be greater than `targetThresholds` for the same resource.
+
+There is another parameter associated with the `LowNodeUtilization` strategy, called `numberOfNodes`.
+This parameter can be configured to activate the strategy only when the number of under utilized nodes
+are above the configured value. This could be helpful in large clusters where a few nodes could go
+under utilized frequently or for a short period of time. By default, `numberOfNodes` is set to zero.
+
+### HighNodeUtilization
+
+This strategy finds nodes that are under utilized and evicts pods from the nodes in the hope that these pods will be
+scheduled compactly into fewer nodes.  Used in conjunction with node auto-scaling, this strategy is intended to help
+trigger down scaling of under utilized nodes.
+This strategy **must** be used with the scheduler scoring strategy `MostAllocated`. The parameters of this strategy are
+configured under `nodeResourceUtilizationThresholds`.
+
+The under utilization of nodes is determined by a configurable threshold `thresholds`. The threshold
+`thresholds` can be configured for cpu, memory, number of pods, and extended resources in terms of percentage. The percentage is
+calculated as the current resources requested on the node vs [total allocatable](https://kubernetes.io/docs/concepts/architecture/nodes/#capacity).
+For pods, this means the number of pods on the node as a fraction of the pod capacity set for that node.
+
+If a node's usage is below threshold for all (cpu, memory, number of pods and extended resources), the node is considered underutilized.
+Currently, pods request resource requirements are considered for computing node resource utilization.
+Any node above `thresholds` is considered appropriately utilized and is not considered for eviction.
+
+The `thresholds` param could be tuned as per your cluster requirements. Note that this
+strategy evicts pods from `underutilized nodes` (those with usage below `thresholds`)
+so that they can be recreated in appropriately utilized nodes.
+The strategy will abort if any number of `underutilized nodes` or `appropriately utilized nodes` is zero.
+
+**NOTE:** Node resource consumption is determined by the requests and limits of pods, not actual usage.
+This approach is chosen in order to maintain consistency with the kube-scheduler, which follows the same
+design for scheduling pods onto nodes. This means that resource usage as reported by Kubelet (or commands
+like `kubectl top`) may differ from the calculated consumption, due to these components reporting
+actual usage metrics. Implementing metrics-based descheduling is currently TODO for the project.
+
+**Parameters:**
+
+|Name|Type|
+|---|---|
+|`thresholds`|map(string:int)|
+|`numberOfNodes`|int|
+|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
+|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
+|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
+|`Namespaces`|(see [namespace filtering](#namespace-filtering))|
+
+**Example:**
+
+```yaml
+apiVersion: "descheduler/v1alpha1"
+kind: "DeschedulerPolicy"
+strategies:
+  "HighNodeUtilization":
+     enabled: true
+     params:
+       nodeResourceUtilizationThresholds:
+         thresholds:
+           "cpu" : 20
+           "memory": 20
+           "pods": 20
+```
+
+Policy should pass the following validation checks:
+* Three basic native types of resources are supported: `cpu`, `memory` and `pods`. If any of these resource types is not specified, all its thresholds default to 100%.
+* Extended resources are supported. For example, resource type `nvidia.com/gpu` is specified for GPU node utilization. Extended resources are optional, and will not be used to compute node's usage if it's not specified in `thresholds` explicitly.
+* `thresholds` can not be nil.
+* The valid range of the resource's percentage value is \[0, 100\]
+
+There is another parameter associated with the `HighNodeUtilization` strategy, called `numberOfNodes`.
+This parameter can be configured to activate the strategy only when the number of under utilized nodes
+is above the configured value. This could be helpful in large clusters where a few nodes could go
+under utilized frequently or for a short period of time. By default, `numberOfNodes` is set to zero.
+
+### RemovePodsViolatingInterPodAntiAffinity
+
+This strategy makes sure that pods violating interpod anti-affinity are removed from nodes. For example,
+if there is podA on a node and podB and podC (running on the same node) have anti-affinity rules which prohibit
+them to run on the same node, then podA will be evicted from the node so that podB and podC could run. This
+issue could happen, when the anti-affinity rules for podB and podC are created when they are already running on
+node.
+
+**Parameters:**
+
+|Name|Type|
+|---|---|
+|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
+|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
+|`namespaces`|(see [namespace filtering](#namespace-filtering))|
+|`labelSelector`|(see [label filtering](#label-filtering))|
+|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
+
+**Example:**
+
+```yaml
+apiVersion: "descheduler/v1alpha1"
+kind: "DeschedulerPolicy"
+strategies:
+  "RemovePodsViolatingInterPodAntiAffinity":
+     enabled: true
+```
+
+### RemovePodsViolatingNodeAffinity
+
+This strategy makes sure all pods violating
+[node affinity](https://kubernetes.io/docs/concepts/configuration/assign-pod-node/#node-affinity)
+are eventually removed from nodes. Node affinity rules allow a pod to specify
+`requiredDuringSchedulingIgnoredDuringExecution` type, which tells the scheduler
+to respect node affinity when scheduling the pod but kubelet to ignore
+in case node changes over time and no longer respects the affinity.
+When enabled, the strategy serves as a temporary implementation
+of `requiredDuringSchedulingRequiredDuringExecution` and evicts pod for kubelet
+that no longer respects node affinity.
+
+For example, there is podA scheduled on nodeA which satisfies the node
+affinity rule `requiredDuringSchedulingIgnoredDuringExecution` at the time
+of scheduling. Over time nodeA stops to satisfy the rule. When the strategy gets
+executed and there is another node available that satisfies the node affinity rule,
+podA gets evicted from nodeA.
+
+**Parameters:**
+
+|Name|Type|
+|---|---|
+|`nodeAffinityType`|list(string)|
+|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
+|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
+|`namespaces`|(see [namespace filtering](#namespace-filtering))|
+|`labelSelector`|(see [label filtering](#label-filtering))|
+|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
+
+**Example:**
+
+```yaml
+apiVersion: "descheduler/v1alpha1"
+kind: "DeschedulerPolicy"
+strategies:
+  "RemovePodsViolatingNodeAffinity":
+    enabled: true
+    params:
+      nodeAffinityType:
+      - "requiredDuringSchedulingIgnoredDuringExecution"
+```
+
+### RemovePodsViolatingNodeTaints
+
+This strategy makes sure that pods violating NoSchedule taints on nodes are removed. For example there is a
+pod "podA" with a toleration to tolerate a taint ``key=value:NoSchedule`` scheduled and running on the tainted
+node. If the node's taint is subsequently updated/removed, taint is no longer satisfied by its pods' tolerations
+and will be evicted.
+
+Node taints can be excluded from consideration by specifying a list of excludedTaints. If a node taint key **or**
+key=value matches an excludedTaints entry, the taint will be ignored.
+
+For example, excludedTaints entry "dedicated" would match all taints with key "dedicated", regardless of value.
+excludedTaints entry "dedicated=special-user" would match taints with key "dedicated" and value "special-user".
+
+**Parameters:**
+
+|Name|Type|
+|---|---|
+|`excludedTaints`|list(string)|
+|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
+|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
+|`namespaces`|(see [namespace filtering](#namespace-filtering))|
+|`labelSelector`|(see [label filtering](#label-filtering))|
+|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
+
+**Example:**
+
+````yaml
+apiVersion: "descheduler/v1alpha1"
+kind: "DeschedulerPolicy"
+strategies:
+  "RemovePodsViolatingNodeTaints":
+    enabled: true
+    params:
+      excludedTaints:
+      - dedicated=special-user # exclude taints with key "dedicated" and value "special-user"
+      - reserved # exclude all taints with key "reserved"
+````
+
+### RemovePodsViolatingTopologySpreadConstraint
+
+This strategy makes sure that pods violating [topology spread constraints](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/)
+are evicted from nodes. Specifically, it tries to evict the minimum number of pods required to balance topology domains to within each constraint's `maxSkew`.
+This strategy requires k8s version 1.18 at a minimum.
+
+By default, this strategy only deals with hard constraints, setting parameter `includeSoftConstraints` to `true` will
+include soft constraints.
+
+Strategy parameter `labelSelector` is not utilized when balancing topology domains and is only applied during eviction to determine if the pod can be evicted.
+
+**Parameters:**
+
+|Name|Type|
+|---|---|
+|`includeSoftConstraints`|bool|
+|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
+|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
+|`namespaces`|(see [namespace filtering](#namespace-filtering))|
+|`labelSelector`|(see [label filtering](#label-filtering))|
+|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
+
+**Example:**
+
+```yaml
+apiVersion: "descheduler/v1alpha1"
+kind: "DeschedulerPolicy"
+strategies:
+  "RemovePodsViolatingTopologySpreadConstraint":
+     enabled: true
+     params:
+       includeSoftConstraints: false
+```
+
+
+### RemovePodsHavingTooManyRestarts
+
+This strategy makes sure that pods having too many restarts are removed from nodes. For example a pod with EBS/PD that
+can't get the volume/disk attached to the instance, then the pod should be re-scheduled to other nodes. Its parameters
+include `podRestartThreshold`, which is the number of restarts (summed over all eligible containers) at which a pod
+should be evicted, and `includingInitContainers`, which determines whether init container restarts should be factored
+into that calculation.
+
+**Parameters:**
+
+|Name|Type|
+|---|---|
+|`podRestartThreshold`|int|
+|`includingInitContainers`|bool|
+|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
+|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
+|`namespaces`|(see [namespace filtering](#namespace-filtering))|
+|`labelSelector`|(see [label filtering](#label-filtering))|
+|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
+
+**Example:**
+
+```yaml
+apiVersion: "descheduler/v1alpha1"
+kind: "DeschedulerPolicy"
+strategies:
+  "RemovePodsHavingTooManyRestarts":
+     enabled: true
+     params:
+       podsHavingTooManyRestarts:
+         podRestartThreshold: 100
+         includingInitContainers: true
+```
+
+### PodLifeTime
+
+This strategy evicts pods that are older than `maxPodLifeTimeSeconds`.
+
+You can also specify `states` parameter to **only** evict pods matching the following conditions:
+  - [Pod Phase](https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#pod-phase) status of: `Running`, `Pending`
+  - [Container State Waiting](https://kubernetes.io/docs/concepts/workloads/pods/pod-lifecycle/#container-state-waiting) condition of: `PodInitializing`, `ContainerCreating`
+
+If a value for `states` or `podStatusPhases` is not specified,
+Pods in any state (even `Running`) are considered for eviction.
+
+**Parameters:**
+
+|Name|Type|Notes|
+|---|---|---|
+|`maxPodLifeTimeSeconds`|int||
+|`podStatusPhases`|list(string)|Deprecated in v0.25+ Use `states` instead|
+|`states`|list(string)|Only supported in v0.25+|
+|`thresholdPriority`|int (see [priority filtering](#priority-filtering))||
+|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))||
+|`namespaces`|(see [namespace filtering](#namespace-filtering))||
+|`labelSelector`|(see [label filtering](#label-filtering))||
+
+**Example:**
+
+```yaml
+apiVersion: "descheduler/v1alpha1"
+kind: "DeschedulerPolicy"
+strategies:
+  "PodLifeTime":
+     enabled: true
+     params:
+       podLifeTime:
+         maxPodLifeTimeSeconds: 86400
+         states:
+         - "Pending"
+         - "PodInitializing"
+```
+
+### RemoveFailedPods
+
+This strategy evicts pods that are in failed status phase.
+You can provide an optional parameter to filter by failed `reasons`.
+`reasons` can be expanded to include reasons of InitContainers as well by setting the optional parameter `includingInitContainers` to `true`.
+You can specify an optional parameter `minPodLifetimeSeconds` to evict pods that are older than specified seconds.
+Lastly, you can specify the optional parameter `excludeOwnerKinds` and if a pod
+has any of these `Kind`s listed as an `OwnerRef`, that pod will not be considered for eviction.
+
+**Parameters:**
+
+|Name|Type|
+|---|---|
+|`minPodLifetimeSeconds`|uint|
+|`excludeOwnerKinds`|list(string)|
+|`reasons`|list(string)|
+|`includingInitContainers`|bool|
+|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
+|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
+|`namespaces`|(see [namespace filtering](#namespace-filtering))|
+|`labelSelector`|(see [label filtering](#label-filtering))|
+|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
+
+**Example:**
+
+```yaml
+apiVersion: "descheduler/v1alpha1"
+kind: "DeschedulerPolicy"
+strategies:
+  "RemoveFailedPods":
+     enabled: true
+     params:
+       failedPods:
+         reasons:
+         - "NodeAffinity"
+         includingInitContainers: true
+         excludeOwnerKinds:
+         - "Job"
+         minPodLifetimeSeconds: 3600
+```
+
+## Filter Pods
+
+### Namespace filtering
+
+The following strategies accept a `namespaces` parameter which allows to specify a list of including, resp. excluding namespaces:
+* `PodLifeTime`
+* `RemovePodsHavingTooManyRestarts`
+* `RemovePodsViolatingNodeTaints`
+* `RemovePodsViolatingNodeAffinity`
+* `RemovePodsViolatingInterPodAntiAffinity`
+* `RemoveDuplicates`
+* `RemovePodsViolatingTopologySpreadConstraint`
+