1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 05:14:13 +01:00

Compare commits

...

439 Commits

Author SHA1 Message Date
Kubernetes Prow Robot
6cdfab1aa9 Merge pull request #719 from JaneLiuL/release-1.23
[release-1.23] update docker image version to v0.23 and fix watch namespace issue
2022-02-09 05:34:18 -08:00
Jane Liu L
2e343a5202 update docker image version to v0.23 and fix watch namespace issue 2022-02-09 21:16:49 +08:00
Kubernetes Prow Robot
547dbcec65 Merge pull request #710 from damemi/update-helm-23
[release-1.23] Update helm chart version to v0.23
2022-02-03 12:22:58 -08:00
Mike Dame
5081ad84b5 Update helm chart version to v0.23 2022-02-03 14:57:18 -05:00
Kubernetes Prow Robot
afb1d75ce1 Merge pull request #660 from martin-magakian/features/add_affinity_option
Adding 'affinity' support to run 'descheduler' in CronJob or Deployment
2022-01-27 05:56:27 -08:00
Kubernetes Prow Robot
8e3ef9a6b3 Merge pull request #694 from sharkannon/master
Updates to include annotations to the service account
2022-01-27 05:42:26 -08:00
Kubernetes Prow Robot
778a18c550 Merge pull request #700 from jklaw90/root-ctx
Use the root context cancellation
2022-01-27 05:08:25 -08:00
Julian Lawrence
1a98a566b3 adding cancelation from sigint sigterm 2022-01-25 00:10:09 -08:00
Kubernetes Prow Robot
a643c619c9 Merge pull request #699 from ingvagabund/evict-pods-report-metrics-indendent-of-the-dry-mode
Evictor: report successful eviction independently of the dry-mode
2022-01-20 14:16:29 -08:00
Jan Chaloupka
203388ff1a Evictor: report successful eviction independently of the dry-mode
Dry mode currently does not report metrics when the eviction succeeds
2022-01-20 21:23:19 +01:00
Kubernetes Prow Robot
2844f80a35 Merge pull request #677 from ingvagabund/accumulated-eviction
Use a fake client when evicting pods by individual strategies to accumulate the evictions
2022-01-20 08:15:52 -08:00
Jan Chaloupka
901a16ecbc Do not collect the metrics when the metrics server is not enabled 2022-01-20 17:04:15 +01:00
Jan Chaloupka
e0f086ff85 Use a fake client when evicting pods by individual strategies to accumulate the evictions
Currently, when the descheduler is running with the --dry-run on, no strategy actually
evicts a pod so every strategy always starts with a complete list of
pods. E.g. when the PodLifeTime strategy evicts few pods, the RemoveDuplicatePods
strategy still takes into account even the pods eliminated by the PodLifeTime
strategy. Which does not correspond to the real case scenarios as the
same pod can be evicted multiple times. Instead, use a fake client and
evict/delete the pods from its cache so the strategies evict each pod
at most once as it would be normally done in a real cluster.
2022-01-20 17:04:05 +01:00
Stephen Herd
8752a28025 Merge branch 'kubernetes-sigs-master' 2022-01-13 12:52:36 -08:00
Stephen Herd
24884c7568 Rebase from master 2022-01-13 12:52:06 -08:00
Kubernetes Prow Robot
175f648045 Merge pull request #695 from a7i/liveness-template
make livenessprobe consistent across manifests
2022-01-12 13:37:40 -08:00
Amir Alavi
f50a3fa119 make livenessprobe consistent across manifests; make helm chart configurable via values.yaml 2022-01-12 11:49:17 -05:00
Kubernetes Prow Robot
551eced42a Merge pull request #688 from babygoat/evict-failed-without-ownerrefs
feat: support eviction of failed bare pods
2022-01-11 12:31:15 -08:00
Stephen Herd
3635a8171c Updates to include annotations to the service account, needed for things such as Workload Identity in Google Cloud 2022-01-11 11:55:05 -08:00
Kubernetes Prow Robot
796f347305 Merge pull request #692 from jklaw90/sliding-until
NonSlidingUntil for deployment
2022-01-11 06:21:16 -08:00
Kubernetes Prow Robot
13abbe7f09 Merge pull request #693 from developer-guy/patch-1
Update NOTES.txt
2022-01-10 05:11:13 -08:00
Kubernetes Prow Robot
e4df54d2d1 Merge pull request #685 from JaneLiuL/master
add  liveness probe
2022-01-10 04:29:12 -08:00
Jane Liu L
c38f617e40 add liveness probe 2022-01-10 09:56:53 +08:00
Kubernetes Prow Robot
e6551564c4 Merge pull request #691 from RyanDevlin/waitForNodes
Eliminated race condition in E2E tests
2022-01-07 06:16:30 -08:00
Batuhan Apaydın
3a991dd50c Update NOTES.txt
Signed-off-by: Batuhan Apaydın <batuhan.apaydin@trendyol.com>
Co-authored-by: Furkan Türkal <furkan.turkal@trendyol.com>
Co-authored-by: Emin Aktaş <emin.aktas@trendyol.com>
Co-authored-by: Necatican Yıldırım <necatican.yildirim@trendyol.com>
Co-authored-by: Fatih Sarhan <fatih.sarhan@trendyol.com>
2022-01-07 13:42:00 +03:00
Julian Lawrence
77cb406052 updated until -> sliding until 2022-01-06 12:55:10 -08:00
RyanDevlin
921a5680ab Eliminated race condition in E2E tests 2022-01-06 09:36:13 -05:00
babygoat
1529180d70 feat: support eviction of failed bare pods
This patch adds the policy(evictFailedBarePods) to allow the failed
pods without ownerReferences to be evicted. For backward compatibility,
disable the policy by default. Address #644.
2022-01-06 01:07:41 +08:00
Kubernetes Prow Robot
2d9143d129 Merge pull request #687 from jklaw90/error-comment
Comment update for metrics
2022-01-04 06:52:52 -08:00
Kubernetes Prow Robot
e9c0833b6f Merge pull request #689 from ingvagabund/run-hack-update-generated-conversions-sh
run ./hack/update-* scripts
2022-01-04 06:34:52 -08:00
Jan Chaloupka
8462cf56d7 run ./hack/update-* scripts 2022-01-04 09:37:01 +01:00
Julian Lawrence
a60d6a527d updated comment to reflect actual value 2021-12-29 10:56:10 -08:00
Kubernetes Prow Robot
2b23694704 Merge pull request #682 from jklaw90/chart-labels
commonLabels value for chart
2021-12-26 06:15:15 -08:00
Julian Lawrence
d0a95bee2f fixed default value for common labels 2021-12-20 08:24:30 -08:00
Julian Lawrence
57a910f5d1 adding commonLabels value 2021-12-18 23:31:52 -08:00
Kubernetes Prow Robot
ccaedde183 Merge pull request #661 from kirecek/enhc/include-pod-reason
Add pod.Status.Reason to the list of reasons
2021-12-17 13:11:55 -08:00
Erik Jankovič
2020642b6f chore: add pod.Status.Reason to the list of reasons
Signed-off-by: Erik Jankovič <erik.jankovic@gmail.com>
2021-12-17 18:37:53 +01:00
Kubernetes Prow Robot
96ff5d2dd9 Merge pull request #680 from ingvagabund/klog-output-stdout
Set the klog output to stdout by default
2021-12-16 05:31:18 -08:00
Jan Chaloupka
d8718d7db3 Set the klog output to stdout by default
Also, one needs to set --logtostderr=false to properly log into the stdout
2021-12-16 11:22:40 +01:00
Kubernetes Prow Robot
1e5165ba9f Merge pull request #670 from autumn0207/improve_pod_eviction_metrics
Add node name label to the counter metric for evicted pods
2021-12-16 01:49:18 -08:00
autumn0207
8e74f8bd77 improve pod eviction metrics 2021-12-16 17:06:22 +08:00
Kubernetes Prow Robot
2424928019 Merge pull request #667 from damemi/1.23-rc.0
bump: k8s to 1.23
2021-12-15 06:56:20 -08:00
Jan Chaloupka
e6314d2c7e Init the klog directly
Since 3948cb8d1b (diff-465167b08358906be13f9641d4798c6e8ad0790395e045af8ace4d08223fa922R78)
the klog verbosity level gets always overriden.
2021-12-15 09:23:20 -05:00
Kubernetes Prow Robot
271ee3c7e3 Merge pull request #678 from a7i/golangci-fix
fix: install golangci using from the golangci repo
2021-12-15 02:20:19 -08:00
Amir Alavi
e58686c142 fix: install golangci using from the golangci repo 2021-12-14 13:18:19 -05:00
Kubernetes Prow Robot
0b2c10d6ce Merge pull request #673 from Garrybest/pr_pod_cache
list pods assigned to a node by pod informer cache
2021-12-14 01:32:04 -08:00
Garrybest
cac3b9185b reform all test files
Signed-off-by: Garrybest <garrybest@foxmail.com>
2021-12-11 19:43:16 +08:00
Mike Dame
94888e653c Move klog initialization to cli.Run() 2021-12-10 12:00:11 -05:00
Mike Dame
936578b238 Update k8s version in helm test 2021-12-10 10:14:47 -05:00
Mike Dame
4fa7bf978c run hack/update-generated-deep-copies.sh 2021-12-10 10:02:39 -05:00
Mike Dame
2f7c496944 React to 1.23 bump
Logging validation functions changed in upstream commit
54ecfcdac8.
This uses the new function name.
2021-12-10 10:02:26 -05:00
Mike Dame
5fe3ca86ff bump: k8s to 1.23 2021-12-10 10:02:14 -05:00
Garrybest
0ff8ecb41e reform all strategies by using getPodsAssignedToNode
Signed-off-by: Garrybest <garrybest@foxmail.com>
2021-12-10 19:28:51 +08:00
Garrybest
08ed129a07 reform ListPodsOnANode by using pod informer and indexer
Signed-off-by: Garrybest <garrybest@foxmail.com>
2021-12-10 19:25:20 +08:00
Kubernetes Prow Robot
49ad197dfc Merge pull request #658 from JaneLiuL/master
Add maxNoOfPodsToEvictPerNamespace policy
2021-12-03 01:50:27 -08:00
Kubernetes Prow Robot
82201d0e48 Add maxNoOfPodsToEvictPerNamespace policy 2021-12-03 10:58:37 +08:00
Kubernetes Prow Robot
2b95332e8c Merge pull request #665 from spiffxp/use-k8s-infra-for-gcb-image
images: use k8s-staging-test-infra/gcb-docker-gcloud
2021-11-30 13:59:01 -08:00
Aaron Crickenberger
e8ed62e540 images: use k8s-staging-test-infra/gcb-docker-gcloud 2021-11-30 13:12:18 -08:00
Kubernetes Prow Robot
e5725de7bb Merge pull request #664 from stpabhi/dev
fix typo minPodLifeTimeSeconds
2021-11-30 08:10:56 -08:00
Abhilash Pallerlamudi
c47e811937 fix typo minPodLifeTimeSeconds
Signed-off-by: Abhilash Pallerlamudi <stp.abhi@gmail.com>
2021-11-29 17:51:40 -08:00
Kubernetes Prow Robot
e0bac4c371 Merge pull request #662 from ingvagabund/drop-deprecated-flags
Drop deprecated flags
2021-11-29 08:43:23 -08:00
Jan Chaloupka
73a7adf572 Drop deprecated flags 2021-11-29 17:12:59 +01:00
Kubernetes Prow Robot
5cf381a817 Merge pull request #663 from ingvagabund/bump-go-to-1.17
Bump go version in go.mod to go1.17
2021-11-29 08:01:23 -08:00
Jan Chaloupka
4603182320 Bump go version in go.mod to go1.17 2021-11-29 16:49:35 +01:00
Martin Magakian
ad207775ff Adding 'affinity' support to run 'descheduler' in CronJob or Deployment 2021-11-18 11:08:36 +01:00
Kubernetes Prow Robot
d0f11a41c0 Merge pull request #639 from JaneLiuL/master
Ignore Pods With Deletion Timestamp
2021-11-15 08:44:49 -08:00
Jane Liu L
c7524705b3 Ignore Pods With Deletion Timestamp 2021-11-10 09:32:11 +08:00
Kubernetes Prow Robot
50f9513cbb Merge pull request #642 from wking/clarify-RemovePodsHavingTooManyRestarts
README: Clarify podRestartThreshold applying to the sum over containers
2021-10-13 03:15:49 -07:00
W. Trevor King
6fd80ba29c README: Clarify podRestartThreshold applying to the sum over containers
calcContainerRestarts sums over containers.  The new language makes
that clear, avoiding potential confusion vs. an altenative that looked
for pods where a single container had passed the configured threshold.
For example, with three containers with 50 restarts and a threshold of
100, the actual "sum over containers" logic makes that pod a candidate
for descheduling, but the "largest single container restart count"
hypothetical would not have made it a candidate.

Also shifts labelSelector into the parameter table, because when it
was added in 29ade13ce7 (README and e2e-testcase add for
labelSelector, 2021-03-02, #510), it landed a few lines too high.
2021-10-07 14:51:26 -07:00
Kubernetes Prow Robot
5b557941fa Merge pull request #627 from JaneLiuL/master
Add E2E test case cover duplicatepods strategy
2021-10-01 00:01:22 -07:00
Kubernetes Prow Robot
c6229934a0 Merge pull request #637 from KohlsTechnology/helm-suspend-docs
Document suspend helm chart configuration option
2021-09-30 23:47:22 -07:00
Sean Malloy
ed28eaeccc Document suspend helm chart configuration option 2021-09-30 23:30:10 -05:00
Kubernetes Prow Robot
3be910c238 Merge pull request #621 from uthark/oatamanenko/deleted
Ignore pods being deleted
2021-09-30 21:21:22 -07:00
Kubernetes Prow Robot
d96dd6da2d Merge pull request #632 from a7i/amir/failedpods-crash
RemoveFailedPods: guard against nil descheduler strategy (e.g. in case of default that loads all strategies)
2021-09-29 01:02:49 -07:00
Amir Alavi
f7c26ef41f e2e tests for RemoveFailedPods strategy
Fix priority class default
2021-09-26 20:39:32 -04:00
Jane Liu L
57ad9cc91b Add E2E test case cover tooManyRestarts strategy 2021-09-26 09:10:17 +08:00
Kubernetes Prow Robot
926339594d Merge pull request #622 from yutachaos/feature/added_suspend_parameter
Added support for cronjob suspend
2021-09-22 09:18:01 -07:00
Amir Alavi
1ba53ad68c e2e TestTopologySpreadConstraint: ensure pods are running before checking for topology spread across domains 2021-09-20 18:18:47 -04:00
Amir Alavi
6eb37ce079 RemoveFailedPods: guard against nil descheduler strategy (e.g. in case of default that loads all strategies) 2021-09-20 11:20:54 -04:00
Kubernetes Prow Robot
54d660eee0 Merge pull request #629 from chenkaiyue/fix-node-affinity-test
fix duplicate code in node_affinity_test.go
2021-09-16 01:59:46 -07:00
yutachaos
cf219fbfae Added helm chart suspend parameter
Signed-off-by: yutachaos <18604471+yutachaos@users.noreply.github.com>
2021-09-16 14:32:09 +09:00
kaiyuechen
d1d9ea0c48 fix duplicate code in node_affinity_test.go 2021-09-16 10:39:52 +08:00
Oleg Atamanenko
4448d9c670 Ignore pods being deleted 2021-09-15 00:05:51 -07:00
Kubernetes Prow Robot
3909f3acae Merge pull request #623 from damemi/release-1.22
Update Helm chart version to 0.22.0
2021-09-08 13:13:56 -07:00
Mike Dame
9f1274f3ab Update Helm chart version to 0.22.0 2021-09-08 16:00:56 -04:00
Kubernetes Prow Robot
e6926e11ea Merge pull request #617 from KohlsTechnology/docs-1.22
Update Docs and Manifests for v0.22.0
2021-08-31 09:31:37 -07:00
Sean Malloy
16228c9dd1 Update Docs and Manifests for v0.22.0
* Added v0.22 references to README
* Update k8s manifests with v0.22.0 references
* Added table with list of supported architectures by release
2021-08-31 00:24:18 -05:00
Kubernetes Prow Robot
57dabbca5c Merge pull request #597 from a7i/amir/e2e-topology
Add e2e tests for TopologySpreadConstraint
2021-08-30 22:00:28 -07:00
Kubernetes Prow Robot
04439c6e64 Merge pull request #610 from a7i/failed-pods
Introduce RemoveFailedPods strategy
2021-08-30 11:30:09 -07:00
Amir Alavi
0e0e688fe8 Introduce RemoveFailedPods strategy 2021-08-30 14:17:52 -04:00
Kubernetes Prow Robot
0603de4353 Merge pull request #613 from derdanne/derdanne/highnodeutilization-readme-591
nodeutilization strategy: "numberOfNodes" should work as documented
2021-08-24 08:29:14 -07:00
Kubernetes Prow Robot
ea911db6dc Merge pull request #615 from KohlsTechnology/bump-1.22
Bump To k8s 1.22.0
2021-08-19 07:07:24 -07:00
Sean Malloy
c079c7aaae Bump To k8s 1.22.0 2021-08-19 00:40:38 -05:00
Kubernetes Prow Robot
5420988a28 Merge pull request #614 from KohlsTechnology/bump-go-version
Bump To Go 1.16.7
2021-08-18 02:10:08 -07:00
Sean Malloy
b56a1ab80a Bump To Go 1.16.7
The main k/k repo was updated to Go 1.16.7 for k8s
v1.22.0 release. See below PR for reference.

https://github.com/kubernetes/kubernetes/pull/104200
2021-08-17 23:47:45 -05:00
Daniel Klockenkämper
a6b34c1130 nodeutilization strategy: "numberOfNodes" should work as documented 2021-08-17 13:29:07 +00:00
Amir Alavi
0de8002b7d Update gce scripts to spread nodes over 2 zones 2021-08-16 22:41:37 -04:00
Amir Alavi
84d648ff60 Add e2e tests for TopologySpreadConstraint 2021-08-16 22:39:31 -04:00
Kubernetes Prow Robot
6ad6f6fce5 Merge pull request #592 from chrisjohnson00/issue-591
docs: adding clarification to HighNodeUtilization's purpose
2021-08-12 21:38:22 -07:00
Kubernetes Prow Robot
b7100ad871 Merge pull request #582 from praxist/helm-deployment
Update helm chart for running as deployment
2021-08-12 07:29:47 -07:00
Matthew Leung
38c0f1c639 Update helm chart for running as deployment 2021-08-11 11:56:14 -07:00
Kubernetes Prow Robot
64d7901d82 Merge pull request #602 from a7i/balance-domains-aboveavg
TopologySpreadConstraint: advance above avg index when at ideal average
2021-08-10 05:47:25 -07:00
Kubernetes Prow Robot
ab1015e5fa Merge pull request #599 from a7i/amir/update-gce-images
Update GCE images to Ubuntu 18.04 LTS
2021-08-10 05:47:18 -07:00
Kubernetes Prow Robot
1753bf4422 Merge pull request #611 from a7i/docs-remove-redundant-strategies
Update README to remove redundant list of strategies
2021-08-02 08:11:22 -07:00
Amir Alavi
7cb44dca27 Update README to remove redundant list of strategies 2021-07-30 16:35:00 -04:00
Amir Alavi
2ec4b8b674 Update GCE images to Ubuntu 18.04 LTS 2021-07-30 16:14:20 -04:00
Kubernetes Prow Robot
0d0633488d Merge pull request #598 from jayonlau/clenup
Clean up extra spaces
2021-07-30 11:11:37 -07:00
Kubernetes Prow Robot
f3b3853d9d Merge pull request #562 from wsscc2021/helm-chart-tolerations
Add tolerations to cronjob in helm chart
2021-07-30 10:43:38 -07:00
Kubernetes Prow Robot
e550e5e22a Merge pull request #600 from a7i/topology-example
Add example for RemovePodsViolatingTopologySpreadConstraint
2021-07-28 05:37:34 -07:00
Kubernetes Prow Robot
2bf37ff495 Merge pull request #608 from a7i/master
Remove kind binary from repo
2021-07-28 01:13:35 -07:00
Amir Alavi
fa84ec6774 Remove kind binary from repo and add to gitignore 2021-07-27 08:28:55 -04:00
Kubernetes Prow Robot
e18e0416b1 Merge pull request #607 from a7i/fix-helm-test
Place bash shebang at the top of the script + Ensure Helm installed for run-helm-tests
2021-07-27 00:40:45 -07:00
Amir Alavi
34282162f8 Wait for job to start/finish in helm-test script 2021-07-26 23:53:36 -04:00
Amir Alavi
7a043d31be Ensure helm is installed and move shebang to top of file 2021-07-26 16:16:20 -04:00
Amir Alavi
b0e5d64bd7 TopologySpreadConstraint: advance above avg index when at ideal average when balancing domains 2021-07-13 22:55:10 -04:00
Amir Alavi
1c9ac2daee Add example for RemovePodsViolatingTopologySpreadConstraint 2021-07-09 22:56:59 -04:00
jayonlau
c6b67e8a6f Clean up extra spaces
Clean up extra spaces, although these errors are not important, they affect the code specification.
2021-07-09 15:40:35 +08:00
Kubernetes Prow Robot
2e4873d103 Merge pull request #594 from ikarldasan/master-to-main
Rename master to main
2021-07-06 05:50:18 -07:00
Karl Dasan
3e483c4d85 Rename master to main 2021-07-01 13:48:50 +05:30
Kubernetes Prow Robot
032ea70380 Merge pull request #549 from pravarag/verify-defaulters-gen
Add verify scripts for defaulters generator
2021-06-30 04:05:03 -07:00
Pravar Agrawal
df84dc4548 add verify script for defaulters gen 2021-06-30 09:41:43 +05:30
Chris Johnson
d4fa83f8bc chore: PR feedback 2021-06-25 12:30:28 -07:00
Chris Johnson
448dbceadd docs: adding clarification to HighNodeUtilization's purpose 2021-06-24 16:20:21 -07:00
Kubernetes Prow Robot
b83b064992 Merge pull request #589 from a7i/remove-kind-bin
Remove kind binary from repo
2021-06-16 00:19:59 -07:00
Amir Alavi
133a0049e3 Remove kind binary from repo 2021-06-15 18:29:06 -04:00
Kubernetes Prow Robot
50b1c1337d Merge pull request #561 from mekza/support_image_pull_secrets
Add support for private registry creds
2021-06-10 23:28:58 -07:00
Martin-Zack Mekkaoui
d5deed44ca Add support for private registry creds 2021-06-09 22:11:37 +02:00
Kubernetes Prow Robot
0f785b9530 Merge pull request #584 from damemi/update-go-badge
Update Go report card badge
2021-06-08 10:48:08 -07:00
Mike Dame
eb1f0ecc14 Update Go report card badge 2021-06-08 13:39:53 -04:00
Kubernetes Prow Robot
b59995eeb8 Merge pull request #583 from ingvagabund/highnodeutil-nodefit
HighNodeUtilization: add NodeFit feature
2021-06-08 08:15:13 -07:00
Jan Chaloupka
d998d82357 HighNodeUtilization: add NodeFit feature 2021-06-08 16:59:43 +02:00
Kubernetes Prow Robot
f51ea72ce0 Merge pull request #577 from a7i/amira/cronjob-ga
Use stable batch/v1 API Group for Kubernetes 1.21
2021-06-08 06:03:13 -07:00
Kubernetes Prow Robot
fe8d4c0d21 Merge pull request #572 from audip/feature/add-deployment-k8s-yaml-files
Add run descheduler as deployment
2021-06-08 05:47:14 -07:00
Kubernetes Prow Robot
3843a2d5d1 Merge pull request #550 from hanumanthan/highnodeutilisation
Highnodeutilization strategy
2021-06-08 01:59:12 -07:00
Kubernetes Prow Robot
839a237d6a Merge pull request #581 from jsravn/patch-1
Remove namespace from ClusterRoleBinding
2021-06-07 07:16:40 -07:00
Hanu
4cd1e66ef3 Adding highnodeutilization strategy 2021-06-06 18:01:42 +08:00
Hanu
2f18864fa5 Refractor - Modify the common functions to be used by high utilisation 2021-06-06 18:00:43 +08:00
Hanu
6e71068f4f Refractoring lownodeutilization - extracting common functions 2021-06-06 18:00:29 +08:00
James Ravn
e40620effa Remove namespace from ClusterRoleBinding
It's not namespace scoped. This breaks some tools like kpt.
2021-06-04 11:04:14 +01:00
Kubernetes Prow Robot
d7dc0abd7b Merge pull request #576 from a7i/amira/topology-spread-label-filter
Filter pods by labelSelector during eviction for TopologySpreadConstraint strategy
2021-06-01 23:38:37 -07:00
Amir Alavi
012ca2398f Filter pods by labelSelector during eviction for TopologySpreadConstraint strategy 2021-06-01 15:42:23 -04:00
Amir Alavi
f07089d7b3 Bump Helm Chart, kind, and Kubernetes version for helm-test 2021-06-01 12:44:05 -04:00
Amir Alavi
a54b59f208 Use stable batch/v1 API Group for Kubernetes 1.21 2021-06-01 12:44:05 -04:00
Kubernetes Prow Robot
bfd5feaf60 Merge pull request #559 from RyanDevlin/nodeFit
Working nodeFit feature
2021-05-31 09:06:26 -07:00
RyanDevlin
41d46d0d3b Working nodeFit feature 2021-05-24 09:03:38 -04:00
Aditya Purandare
646c13ae15 Fix grammar and indentation issue for deployment resource 2021-05-21 12:42:51 -07:00
Kubernetes Prow Robot
3b9d3d9719 Merge pull request #563 from ingvagabund/removeduplicates-take-taints-into-account-for-node-count
RemoveDuplicates: take node taints, node affinity and node selector into account when computing a number of feasible nodes for the average occurence of pods per node
2021-05-21 05:38:46 -07:00
Aditya Purandare
449383caa3 Add run descheduler as deployment files and update README 2021-05-20 17:04:05 -07:00
Kubernetes Prow Robot
31fd097c0a Merge pull request #527 from pravarag/add-helm-test
Add helm test
2021-05-19 06:10:51 -07:00
Kubernetes Prow Robot
11143d5b2c Merge pull request #570 from KohlsTechnology/bump-kind-version
Bump kind version
2021-05-19 05:40:51 -07:00
Sean Malloy
8480e03e9c Fail unit and e2e tests on any errors 2021-05-19 00:35:16 -05:00
Sean Malloy
0397425010 Bump Kind To v0.11.0
This is required for running e2e tests for k8s v1.21.
2021-05-18 23:37:10 -05:00
Jan Chaloupka
5396282e3d RemoveDuplicates: take node taints, node affinity and node selector into account when computing a number of feasible nodes for the average occurence of pods per node
Nodes with taints which are not tolerated by evicted pods will never run the
pods. The same holds for node affinity and node selector.
So increase the number of pods per feasible nodes to decrease the
number of evicted pods.
2021-05-18 16:13:24 +02:00
Kubernetes Prow Robot
a9ff644de6 Merge pull request #568 from BinacsLee/binacs-pkg-descheduler-descheduler_test-fix-errorhandling
Add verify script for govet & fix pkg/descheduler/descheduler_test.go
2021-05-17 08:18:23 -07:00
BinacsLee
fe8e17f72c fix staticcheck failure for pkg/descheduler/descheduler_test.go 2021-05-17 23:07:12 +08:00
Kubernetes Prow Robot
a1709e9edd Merge pull request #567 from a7i/topology-taint-toleration
RemovePodsViolatingTopologySpreadConstraint : Take node's taints into consideration when balancing domains
2021-05-14 12:41:58 -07:00
Amir Alavi
24c0ca2ef9 Take node's taints into consideration when balancing domains 2021-05-14 15:23:58 -04:00
Kubernetes Prow Robot
9b26abd538 Merge pull request #565 from damemi/issue-564
Add test cases for soft constraints/multi constraints
2021-05-12 19:14:24 -07:00
Mike Dame
fc83c13166 Add test cases for soft constraints/multi constraints 2021-05-12 08:36:07 -04:00
Kubernetes Prow Robot
9b69962053 Merge pull request #535 from ingvagabund/e2e-refactor
E2e refactor
2021-05-11 06:33:36 -07:00
Jan Chaloupka
4edbecc85d Define NodeSelectorsEqual predicate 2021-05-09 18:08:32 +02:00
Jan Chaloupka
54f67266bb Define TolerationsEqual 2021-05-09 18:08:27 +02:00
Donggeun Lee
4ba48b018c Add tolerations to cronjob in helm chart 2021-05-04 19:56:08 +09:00
Kubernetes Prow Robot
2a3529c543 Merge pull request #560 from damemi/update-verify-messages
Update error messages in verify scripts to be more informative
2021-04-30 20:05:59 -07:00
Mike Dame
58408d710b Update error messages in verify scripts to be more informative 2021-04-30 16:56:16 -04:00
Kubernetes Prow Robot
161f66a12f Merge pull request #558 from KohlsTechnology/structured-logging
Use Structured Logging For Unknown Strategy Log Message
2021-04-28 23:28:50 -07:00
Sean Malloy
6bde95c9a1 Use Structured Logging For Unknown Strategy Log Message
Always use structured logging. Therefore update klog.Errorf() to instead
use klog.ErrorS().

Here is an example of the new log message.

E0428 23:58:57.048912 586 descheduler.go:145] "skipping strategy" err="unknown strategy name" strategy=ASDFPodLifeTime
2021-04-29 00:00:07 -05:00
Kubernetes Prow Robot
724ff8a188 Merge pull request #556 from damemi/change-main-loop
Invert main strategy loop for performance and customizability
2021-04-28 07:52:50 -07:00
Mike Dame
feae158a50 Invert main strategy loop for performance and customizability 2021-04-28 10:36:02 -04:00
Kubernetes Prow Robot
780ac7a51e Merge pull request #554 from BinacsLee/binacs-utils-predicates-cleanup
code cleanup: remove check on length
2021-04-26 07:19:01 -07:00
BinacsLee
c4afb6bb30 code cleanup: remove check on length 2021-04-25 21:44:20 +08:00
Pravar Agrawal
8b5c4e805d update docs with helm test info 2021-04-15 15:56:42 +05:30
Jan Chaloupka
f4e24a408f Drop klog 2021-04-14 09:03:38 +02:00
Jan Chaloupka
2781106d49 TestEvictAnnotation: replace LowNodeUtilization strategy with PodLifetime
PodLifetime is simpler in validating results
2021-04-14 09:03:31 +02:00
Jan Chaloupka
534a30a058 e2e: deleteRC: replace loop with wait.PollImmediate 2021-04-14 09:03:26 +02:00
Jan Chaloupka
bb55741320 Update vendor 2021-04-14 09:03:21 +02:00
Jan Chaloupka
079bd6157b e2e: TestLowNodeUtilization: normalize nodes before running the strategy 2021-04-14 09:03:15 +02:00
Pravar Agrawal
92cb1b23ed add helm-test configurations 2021-04-14 10:29:31 +05:30
Kubernetes Prow Robot
832facc526 Merge pull request #537 from KohlsTechnology/docs-0.21.0
Update Docs and Manifests for v0.21.0
2021-04-13 08:24:52 -07:00
Kubernetes Prow Robot
c4fa6c472f Merge pull request #548 from lx1036/feature/fix-testcase
Updating policy api version used in pod evictor
2021-04-13 08:12:50 -07:00
Xiang Liu
a848dac3cf Updating policy api version used in pod evictor 2021-04-13 11:02:01 +08:00
Kubernetes Prow Robot
43a2ccf9c4 Merge pull request #546 from ingvagabund/add-diagram
Add diagram of strategies
2021-04-12 13:38:15 -07:00
Jan Chaloupka
60cf3aeb95 Add diagram of strategies 2021-04-12 09:51:54 +02:00
Kubernetes Prow Robot
84b174e841 Merge pull request #547 from KohlsTechnology/bump-1.21.0
Bump To k8s 1.21.0
2021-04-08 23:00:48 -07:00
Sean Malloy
40337d064d Bump To k8s 1.21.0 2021-04-08 23:36:06 -05:00
Kubernetes Prow Robot
9fe585c854 Merge pull request #545 from pravarag/add-verify-script-deep-copies
Add verify script for deep-copies generator
2021-04-08 21:20:35 -07:00
Pravar Agrawal
4fce2ca2f1 add verify script for deep-copies gen 2021-04-08 22:51:24 +05:30
Kubernetes Prow Robot
4c11de0403 Merge pull request #507 from pravarag/add-verify-scripts
Add verify scripts for make gen to run during PR
2021-04-07 21:41:41 -07:00
Pravar Agrawal
a9099efc45 add verify scripts for conversions gen
Signed-off-by: Pravar Agrawal <pravaag1@in.ibm.com>
2021-04-08 09:56:32 +05:30
Kubernetes Prow Robot
6edb644f2e Merge pull request #544 from ingvagabund/lnu-improve-node-usage-logging
LNU: improve nodeUsage logging
2021-04-06 23:45:53 -07:00
Jan Chaloupka
c239e1199f LNU: improve nodeUsage logging
To avoid:
```
I0210 11:56:04.137956 3309277 lownodeutilization.go:389] "Updated node usage" updatedUsage={node:0xc000460000 usage:map[cpu:0xc00042b480 memory:0xc00042b4c0 pods:0xc00042b500] allPods:[0xc0004a0000 0xc0004a03e8 0xc0004a07d0 0xc0004a0bb8 0xc0004a0fa0 0xc0004a1388 0xc0004a1770 0xc0004a1b58] lowResourceThreshold:map[cpu:0xc00042b540 memory:0xc00042b580 pods:0xc00042b5c0] highResourceThreshold:map[cpu:0xc00042b600 memory:0xc00042b640 pods:0xc00042b680]}
I0210 11:56:04.138829 3309277 lownodeutilization.go:389] "Updated node usage" updatedUsage={node:0xc000460000 usage:map[cpu:0xc00042b480 memory:0xc00042b4c0 pods:0xc00042b500] allPods:[0xc0004a0000 0xc0004a03e8 0xc0004a07d0 0xc0004a0bb8 0xc0004a0fa0 0xc0004a1388 0xc0004a1770 0xc0004a1b58] lowResourceThreshold:map[cpu:0xc00042b540 memory:0xc00042b580 pods:0xc00042b5c0] highResourceThreshold:map[cpu:0xc00042b600 memory:0xc00042b640 pods:0xc00042b680]}
I0210 11:56:04.139044 3309277 lownodeutilization.go:389] "Updated node usage" updatedUsage={node:0xc000460000 usage:map[cpu:0xc00042b480 memory:0xc00042b4c0 pods:0xc00042b500] allPods:[0xc0004a0000 0xc0004a03e8 0xc0004a07d0 0xc0004a0bb8 0xc0004a0fa0 0xc0004a1388 0xc0004a1770 0xc0004a1b58] lowResourceThreshold:map[cpu:0xc00042b540 memory:0xc00042b580 pods:0xc00042b5c0] highResourceThreshold:map[cpu:0xc00042b600 memory:0xc00042b640 pods:0xc00042b680]}
```
2021-04-06 09:43:23 +02:00
Kubernetes Prow Robot
b713b7852a Merge pull request #434 from ZongqiangZhang/extend-resources
Support extended resources in LowNodeUtilization
2021-04-06 00:21:34 -07:00
Kubernetes Prow Robot
5d07d0c8e2 Merge pull request #543 from gaurav1086/e2e_test_fix_goroutine_leak
e2e_test: fix goroutine leak
2021-04-05 18:11:19 -07:00
Gaurav Singh
7076ba0760 e2e_test: fix goroutine leak 2021-04-04 21:19:56 -04:00
ZongqiangZhang
81b816d4a4 support extended resources in lownodeutilization 2021-04-02 21:37:51 +08:00
Kubernetes Prow Robot
9ebc909c7f Merge pull request #541 from KohlsTechnology/make-gen
Update Generated Code
2021-04-01 00:39:22 -07:00
Sean Malloy
af01b675b0 Update Generated Code
Ran "make gen" using Go 1.16.1. Some changes were merged, but "make gen"
was not run. This fixes the problem.

See below PR for reference:
https://github.com/kubernetes-sigs/descheduler/pull/523
2021-04-01 00:28:20 -05:00
Kubernetes Prow Robot
ce6ce5a058 Merge pull request #539 from damemi/1.21-rc.0
Bump to k8s 1.21-rc.0
2021-03-31 16:55:21 -07:00
Mike Dame
bd4f6d4fcd Bump to k8s 1.21-rc.0 2021-03-31 10:22:56 -04:00
Sean Malloy
6a4181158a Update Docs and Manifests for v0.21.0
* Added v0.21 references to README
* Update k8s manifests with v0.21.0 references
* Added table with list of supported architectures by release
2021-03-31 00:53:19 -05:00
Kubernetes Prow Robot
a2746d09e8 Merge pull request #523 from RyanDevlin/evict-critical
Added EvictSystemCriticalPods flag to descheduler
2021-03-30 10:41:57 -07:00
RyanDevlin
b5d7219391 Completed evictSystemCriticalPods feature 2021-03-29 23:13:05 -04:00
Kubernetes Prow Robot
b09d5d99dc Merge pull request #534 from KohlsTechnology/kustomize-docs
Use Tags In Kustomize Documentation
2021-03-27 23:46:44 -07:00
Sean Malloy
dbcc20f37f Use Tags In Kustomize Documentation
The master branch always represents the next release of the
descheduler. Therefore applying the descheduler k8s manifests
from the master branch is not considered stable. It is best for
users to install descheduler using the released tags.
2021-03-27 01:21:08 -05:00
Kubernetes Prow Robot
51340b56b8 Merge pull request #533 from ingvagabund/bump-to-go1.16
Bump go to 1.16
2021-03-26 23:20:43 -07:00
Jan Chaloupka
160669817e Bump go to 1.16 2021-03-25 10:13:40 +01:00
Kubernetes Prow Robot
6ca4479892 Merge pull request #520 from KohlsTechnology/statefulset-docs
Document That Descheduler Considers StatefulSets For Eviction
2021-03-23 08:15:36 -07:00
Sean Malloy
92740a25d4 Add Initial Unit Tests For StatefulSets 2021-03-23 09:24:08 -05:00
Sean Malloy
56e4daccaf Document That Descheduler Considers StatefulSets For Eviction
Similar to ReplicaSet, ReplicationController, and Jobs pods with a
StatefulSet metadata.ownerReference are considered for eviction.
Document this, so that it is clear to end users.
2021-03-23 09:24:08 -05:00
Kubernetes Prow Robot
b546832b66 Merge pull request #526 from KohlsTechnology/chart-docs-fix
Correct Helm Chart Docs For Container Requests and Limits
2021-03-15 08:59:05 -07:00
Kubernetes Prow Robot
39e5b34af3 Merge pull request #525 from damemi/topology-spread-selector-fix
(TopologySpread) Evict pods with selectors that match multiple nodes
2021-03-14 01:35:04 -08:00
Sean Malloy
e699e08d13 Correct Helm Chart Docs For Container Requests and Limits
The resources.cpuRequest and resources.memoryRequest varialbes are
not valid in the helm chart values.yaml file. The correct varialbe
name for setting the requests and limits is resources.

Also, fixed white space alignment in the markdown table.
2021-03-12 23:17:02 -06:00
Mike Dame
af26b57e5e (TopologySpread) Evict pods with selectors that match multiple nodes 2021-03-12 13:41:17 -05:00
Kubernetes Prow Robot
22fe589ae6 Merge pull request #508 from KohlsTechnology/emeritus-approvers
Move Inactive Maintainers to Emeritus Status
2021-03-12 09:06:18 -08:00
Kubernetes Prow Robot
0a11b5a138 Merge pull request #521 from KohlsTechnology/armv7
Enable ARM32v7 Container Image Builds
2021-03-09 17:07:13 -08:00
Sean Malloy
363f02710b Enable ARM32v7 Container Image Builds
Add ARM32v7 in addition to the currently supported architectures. This
will allow running descheduler on Raspberry Pi v3 devices.
2021-03-09 09:03:58 -06:00
Kubernetes Prow Robot
6abfa232e7 Merge pull request #518 from damemi/sigs-k8s-mdtoc
Add table of contents generator/verify script
2021-03-08 12:06:59 -08:00
Mike Dame
bbfb12a120 Run hack/update-toc.sh 2021-03-08 14:39:08 -05:00
Mike Dame
5df2a0c516 Add hack scripts and makefile targets 2021-03-08 14:39:08 -05:00
Mike Dame
8ecd14289a Add <toc> markers to README 2021-03-08 10:51:55 -05:00
Mike Dame
131ed42a4c Add sigs.k8s.io/mdtoc dependency 2021-03-08 10:46:27 -05:00
Kubernetes Prow Robot
6b8d4cd5a7 Merge pull request #517 from lixiang233/fix_topology_log
Correct log in topology spread strategy
2021-03-07 11:31:42 -08:00
Kubernetes Prow Robot
24a06511a2 Merge pull request #505 from ingvagabund/collect-metrics
Collect metrics
2021-03-06 21:41:42 -08:00
lixiang
09c7d1be0a Correct log in topology spread strategy 2021-03-06 16:21:46 +08:00
Kubernetes Prow Robot
f5666882de Merge pull request #515 from damemi/clarify-pod-percentage-readme
Clarify resource percentage calculation in README
2021-03-05 07:36:22 -08:00
Jan Chaloupka
701f22404b Serve secure metrics at 10258 2021-03-05 16:35:45 +01:00
Mike Dame
d5fa60bdd5 Clarify resource percentage calculation in README 2021-03-05 09:34:04 -05:00
Kubernetes Prow Robot
9e14f733b7 Merge pull request #510 from lixiang233/Ft_filter_by_label
Filter pods by labelSelector
2021-03-05 01:14:22 -08:00
lixiang
29ade13ce7 README and e2e-testcase add for labelSelector 2021-03-04 21:32:14 +08:00
lixiang
03518badb8 Strategies: Add labelSelector to all strategies except LowNodeUtilization, RemoveDuplicates and RemovePodsViolatingTopologySpreadConstraint. 2021-03-04 21:30:50 +08:00
Jan Chaloupka
24458fb0ca Increase pods_evicted metric 2021-03-03 16:15:21 +01:00
Jan Chaloupka
1c5b32763b Register metrics
New metrics:
- build_info: Build info about descheduler, including Go version, Descheduler version, Git SHA, Git branch
- pods_evicted: Number of successfully evicted pods, by the result, by the strategy, by the namespace
2021-03-03 16:15:09 +01:00
Jan Chaloupka
3bd031bbb3 Move build's versioning bits under pkg/version 2021-03-03 15:56:53 +01:00
Jan Chaloupka
ea6e9f22b9 Vendor metrics 2021-03-03 15:56:48 +01:00
Kubernetes Prow Robot
73309a3948 Merge pull request #490 from damemi/logging-format-default
Add default logging-format value
2021-03-03 06:49:20 -08:00
Kubernetes Prow Robot
105313a0e3 Merge pull request #512 from ingvagabund/bump-gogo-protobuf-to-1.3.2
Bump github.com/gogo/protobuf to v1.3.2
2021-03-02 07:05:19 -08:00
Jan Chaloupka
d368cbed32 Bump github.com/gogo/protobuf to v1.3.2
Fixes https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-3121
2021-03-02 14:48:32 +01:00
Sean Malloy
5f4dfbc922 Update Maintainer Field in Dockerfiles
Set the maintainer field to use the more generic SIG scheduling name and
email address.
2021-03-01 23:19:42 -06:00
Sean Malloy
967911e4c1 Move Inactive Maintainers to Emeritus Status
For roughly the past year damemi has been the only active approver for
the descheduler. Therefore move the inactive approvers to emeritus
status. This will help clarify to contributors who should be assigned to
pull requests.
2021-03-01 09:23:49 -06:00
Kubernetes Prow Robot
726091712d Merge pull request #509 from KohlsTechnology/update-reviwers
Add lixiang233 As A Reviewer
2021-03-01 06:45:25 -08:00
lixiang
854afa7c73 PodsListing: Add WithLabelSelector option. 2021-03-01 11:13:44 +08:00
lixiang
2517268b1f API: Add a new parameter labelSelector to StrategyParameters. 2021-02-26 15:49:00 +08:00
Sean Malloy
fea8beabab Add lixiang233 As A Reviewer 2021-02-25 22:03:50 -06:00
Kubernetes Prow Robot
7f9c95fa16 Merge pull request #506 from KohlsTechnology/kind-0.10.0
Update e2e tests to use kind v0.10.0
2021-02-25 07:29:27 -08:00
Sean Malloy
6f7e2a271e Update e2e tests to use kind v0.10.0 2021-02-25 00:17:06 -06:00
Kubernetes Prow Robot
217ebdfa73 Merge pull request #504 from lixiang233/Fix_lowNodeUtilization_log
Log and README optimization for LowNodeUtilization
2021-02-24 05:22:50 -08:00
lixiang
e014fda58e Log and README optimization for LowNodeUtilization 2021-02-24 09:17:07 +08:00
Kubernetes Prow Robot
6d0360fd16 Merge pull request #502 from damemi/node-affinity-totalevicted
Move 'total pods evicted' log message to main loop
2021-02-19 13:45:41 -08:00
Mike Dame
01a87b6143 Move 'total pods evicted' log message to main loop 2021-02-19 10:37:43 -05:00
Kubernetes Prow Robot
f3e871492c Merge pull request #499 from KohlsTechnology/go-1.15.8
Bump To Go 1.15.8
2021-02-18 05:52:57 -08:00
Kubernetes Prow Robot
bf8d744686 Merge pull request #498 from kristinnardal2/patch-1
Fix indentation in values.yaml
2021-02-18 05:52:51 -08:00
Kubernetes Prow Robot
477d12968e Merge pull request #496 from KohlsTechnology/nonroot-helm-chart
Update Helm Chart to run as non-root
2021-02-18 00:34:51 -08:00
Sean Malloy
dcb4136a96 Bump To Go 1.15.8
The main k/k repo was updated to Go 1.15.8 for the upcoming k8s v1.21.0
release. See below PR for reference.

https://github.com/kubernetes/release/issues/1895
2021-02-18 00:00:31 -06:00
Kristinn Björgvin Árdal
2c0afafccf Fix indentation in values.yaml
A couple of lines had an extra whitespace.
2021-02-17 08:51:26 +01:00
Kubernetes Prow Robot
e56617253f Merge pull request #497 from ingvagabund/resourceUsagePercentages-unit-test
LowNodeUtilization: unit test resourceUsagePercentages to validate percentages are computed correctly
2021-02-11 20:26:48 -08:00
Jan Chaloupka
500aaea4dd LowNodeUtilization: unit test resourceUsagePercentages to validate percentages are computed correctly 2021-02-11 16:04:37 +01:00
Sean Malloy
4444811f26 Update Helm Chart to run as non-root 2021-02-10 21:49:01 -06:00
Kubernetes Prow Robot
b63c6fac27 Merge pull request #495 from KohlsTechnology/nonroot
Update Job and CronJob YAML to run as non-root
2021-02-10 08:22:59 -08:00
Sean Malloy
dfc76906d4 Update Job and CronJob YAML to run as non-root 2021-02-09 23:10:18 -06:00
Kubernetes Prow Robot
fbd17d4caf Merge pull request #480 from damemi/nodeselector-cronjob
Add nodeSelector to cronJob helm chart
2021-02-09 10:32:59 -08:00
Mike Dame
9c01589fb9 Add nodeSelector to cronJob helm chart 2021-02-09 13:27:26 -05:00
Kubernetes Prow Robot
a0942afaa1 Merge pull request #493 from fancc/resource_usage_percentage
Modify resourceUsage.Value() to resourceUsage.MilliValue()
2021-02-02 03:38:28 -08:00
范成城
16fa21a4a6 change resourceUsagePercentage func 2021-01-31 20:46:46 +08:00
Kubernetes Prow Robot
241f1325c9 Merge pull request #481 from damemi/ignore-pvc-pods
Add option to ignore pods with PVCs from eviction
2021-01-26 22:41:40 -08:00
Mike Dame
c1a63a557a Add option to ignore pods with PVCs from eviction 2021-01-26 08:47:54 -05:00
Mike Dame
e45e21368a Add default logging-format value 2021-01-25 14:19:58 -05:00
Kubernetes Prow Robot
f24b367479 Merge pull request #474 from lixiang233/Ft_include_soft_constraints
Add a parameter to include soft topology spread constraints
2021-01-21 09:21:06 -08:00
lixiang
8ba9cb1df7 Add a parameter to include soft topology spread constraints 2021-01-21 11:33:16 +08:00
Kubernetes Prow Robot
d502f05910 Merge pull request #484 from damemi/topology-spread-bug
Fix TopologySpread bug that evicts non-evictable pods
2021-01-12 17:26:36 -08:00
Mike Dame
241f47d947 Fix TopologySpread bug that evicts non-evictable pods 2021-01-12 15:25:55 -05:00
Kubernetes Prow Robot
19424f4119 Merge pull request #482 from damemi/fix-logging-gen
Add Logging field to v1alpha1 componentconfig
2021-01-11 09:44:25 -08:00
Mike Dame
635a40f305 Add Logging field to v1alpha1 componentconfig 2021-01-11 11:21:32 -05:00
Kubernetes Prow Robot
1804d2e3a2 Merge pull request #478 from ryuichi1208/master
Update README.md
2021-01-10 11:43:05 -08:00
Ryuichi Watanabe
fea4870243 Update README.md 2021-01-01 18:45:00 +09:00
Kubernetes Prow Robot
f54df67d11 Merge pull request #471 from Mathew857/master
refactor: remove unused code
2020-12-15 17:20:20 -08:00
wu.chaozong
1a998037f8 refactor: remove unuse param in e2e_test.go 2020-12-16 08:46:40 +08:00
wu.chaozong
c481877c03 refactor: update node_test file 2020-12-15 21:15:33 +08:00
Kubernetes Prow Robot
67df39690b Merge pull request #468 from eatwithforks/jylee/user
Run as user 1000 so pod can be runAsNonRoot for security purposes
2020-12-14 06:43:28 -08:00
wu.chaozong
674f14da78 refactor: remove unused code 2020-12-13 00:09:07 +08:00
Jye Lee
0cfbdf642b Run as user 1000 so pod can be runAsNonRoot for security purposes 2020-12-10 09:37:49 -08:00
Kubernetes Prow Robot
c86d1c7eb2 Merge pull request #463 from ingvagabund/duplicates-uniformly
RemoveDuplicatePods: evict uniformly
2020-12-10 07:14:13 -08:00
Jan Chaloupka
f67c265533 RemoveDuplicatePods: evict uniformly 2020-12-10 15:33:04 +01:00
Kubernetes Prow Robot
969921640c Merge pull request #465 from damemi/skip-balanced-topology
Skip topology calculations if domains are already balanced
2020-12-09 19:48:53 -08:00
Mike Dame
0273fd7597 Skip topology calculations if domains are already balanced 2020-12-09 11:29:59 -05:00
Kubernetes Prow Robot
e84d0c5587 Merge pull request #464 from damemi/1.20-bump
Bump to v0.20.0
2020-12-09 07:12:50 -08:00
Kubernetes Prow Robot
5267ec407c Merge pull request #462 from KohlsTechnology/docs-v0.20.0
Update Docs and Manifests for v0.20.0
2020-12-09 07:04:50 -08:00
Mike Dame
6714d8e0b7 Bump to v0.20.0 2020-12-09 09:53:03 -05:00
Sean Malloy
b3439eab41 Update Docs and Manifests for v0.20.0
* Added v0.20 references to README
* Update k8s manifests with v0.20.0 references
2020-12-09 00:10:50 -06:00
Kubernetes Prow Robot
509118587a Merge pull request #453 from dieterdemeyer/master
Allow setting options successfulJobsHistoryLimit and failedJobsHistoryLimit for cronjob
2020-12-08 14:33:33 -08:00
Kubernetes Prow Robot
f482537dff Merge pull request #455 from AmoVanB/fix/cluster-role-tsc
Helm: allow the topologySpreadConstraint strategy to list namespaces
2020-12-08 11:17:35 -08:00
Amaury Van Bemten
0f95817746 add permission to list namespaces for the topologyspread constraint strategy 2020-12-08 17:11:34 +01:00
Kubernetes Prow Robot
70d1fadae7 Merge pull request #454 from damemi/topology-spread-nsfix
Fix broken namespace logic in TopologySpreadConstraint
2020-12-04 09:34:00 -08:00
Mike Dame
499beb2fd7 Fix broken namespace logic in TopologySpreadConstraint 2020-12-04 10:49:58 -05:00
Kubernetes Prow Robot
de24f3854b Merge pull request #449 from KohlsTechnology/arm-image
Enable Multi-Arch Container Image Builds
2020-12-03 17:55:25 -08:00
Kubernetes Prow Robot
a5e8ba1a70 Merge pull request #452 from KohlsTechnology/go-1.15.5
Bump To Go 1.15.5
2020-12-03 06:21:00 -08:00
Dieter De Meyer
45ad48042f Allow setting options successfulJobsHistoryLimit and failedJobsHistoryLimit for cronjob 2020-12-03 11:56:44 +01:00
Sean Malloy
550de966c7 Bump To Go 1.15.5
The main k/k repo as updated to Go 1.15.5 for the upcoming k8s v1.20.0
release. See below PR for reference.

https://github.com/kubernetes/kubernetes/pull/95776
2020-12-03 01:16:42 -06:00
Kubernetes Prow Robot
c94342db31 Merge pull request #451 from damemi/v0.20.0-rc.0
bump k8s dependencies to 1.20-rc.0
2020-12-02 23:13:00 -08:00
Sean Malloy
94f1c7dd8d Document Multi-Arch Container Image Usage 2020-12-03 00:44:19 -06:00
Sean Malloy
bf91e6790e Enable Multi-Arch Container Image Builds
Previous to this change official descheduler container images only
supported the AMD64 hardware architecture. This change enables
building official descheduler container images for multiple
architectures.

The initially supported architectures are AMD64 and ARM64.
2020-12-03 00:06:22 -06:00
Mike Dame
251f44e568 bump(*): k8s to 1.20-rc.0 2020-12-02 14:40:50 -05:00
Kubernetes Prow Robot
922c4f6a63 Merge pull request #448 from damemi/topology-spread-logging
Add more topology spread logs
2020-12-01 15:10:50 -08:00
Mike Dame
2b5ec01381 Add more topology spread logs 2020-12-01 17:02:46 -05:00
Kubernetes Prow Robot
7bcd562ff5 Merge pull request #444 from KohlsTechnology/issue-432-cronjob
Update Helm Chart To Allow Setting startingDeadlineSeconds
2020-11-24 12:10:40 -08:00
Sean Malloy
03852d0914 Update Helm Chart To Allow Setting startingDeadlineSeconds
Allow end users to optionally set the descheduler CronJob
.spec.startingDeadlineSeconds when installing using helm.
2020-11-24 10:58:16 -06:00
Kubernetes Prow Robot
5f1e9a97c4 Merge pull request #446 from ingvagabund/low-node-util-percentages-times-100
LowNodeUtilization: express usagePercentage multiplied by 100
2020-11-24 08:13:02 -08:00
Jan Chaloupka
cd6f2cd4cb LowNodeUtilization: express usagePercentage multiplied by 100
Entry params are in interval <0; 100>. Have logs respect that as well.
2020-11-24 14:48:48 +01:00
Kubernetes Prow Robot
e679c7fabc Merge pull request #442 from KohlsTechnology/issue-432
Set Container Resources In YAML Manifests
2020-11-23 08:49:34 -08:00
Kubernetes Prow Robot
6f5918d765 Merge pull request #443 from KohlsTechnology/issue-432-helm
Set Container Resources In Helm Chart
2020-11-23 07:47:33 -08:00
Sean Malloy
5a46ba0630 Set Container Resources In Helm Chart
Prior to this commit the helm chart used to install the descheduler
CronJob did not set container requests or limits. This is considered
an anti-pattern when deploying applications on k8s.

Set descheduler container resources to make it a burstable pod. This
will ensure a high quality experience for end users when deploying
descheduler into their clusters using the instructions from the README.

The default values choosen for CPU/Memory are not based on any real data.
2020-11-23 09:12:54 -06:00
Kubernetes Prow Robot
c1323719f4 Merge pull request #427 from KohlsTechnology/fix-version-output
Fix Version Output For Automated Container Builds
2020-11-21 03:19:33 -08:00
Sean Malloy
8795fe6b90 Fix Version Output For Automated Container Builds
Prior to this change the output from the command "descheduler version"
when run using the official container images from k8s.gcr.io would
always output an empty string. See below for an example.

```
docker run k8s.gcr.io/descheduler/descheduler:v0.19.0 /bin/descheduler version
Descheduler version {Major: Minor: GitCommit: GitVersion: BuildDate:2020-09-01T16:43:23+0000 GoVersion:go1.15 Compiler:gc Platform:linux/amd64}
```

This change makes it possible to pass the descheduler version
information to the automated container image build process and
also makes it work for local builds too.
2020-11-20 23:25:22 -06:00
Sean Malloy
a3f8bb0369 Set Container Resources In YAML Manifests
Prior to this commit the YAML manifests used to install the descheduler
Job and CronJob did not set container requests or limits. This is
considered an anti-pattern when deploying applications on k8s.

Set descheduler container resources to make it a burstable pod. This
will ensure a high quality experience for end users when deploying
descheduler into their clusters using the instructions from the README.

The values choosen for CPU/Memory are not based on any real data.
2020-11-20 12:55:10 -06:00
Kubernetes Prow Robot
cd3c3bf4da Merge pull request #441 from ingvagabund/low-node-util-dump-node-usage-in-percentage
LowNodeUtilization: show node usage in percentage as well
2020-11-18 08:44:06 -08:00
Jan Chaloupka
652ee87bf5 LowNodeUtilization: show node usage in percentage as well 2020-11-18 14:50:53 +01:00
Kubernetes Prow Robot
5225ec4597 Merge pull request #436 from stevehipwell/helm-chart-rename
Rename Helm chart
2020-11-17 22:42:05 -08:00
Kubernetes Prow Robot
b2af720ddb Merge pull request #438 from lixiang233/add_missing_parameter
Add missing parameter in README
2020-11-17 20:24:04 -08:00
lixiang
94f07996f7 add missing parameter in README 2020-11-17 16:10:02 +08:00
Kubernetes Prow Robot
4839d5f369 Merge pull request #413 from damemi/podtopologyspread
Add PodTopologySpread strategy
2020-11-13 10:41:05 -08:00
Steve Hipwell
022e07c278 Rename helm chart 2020-11-09 10:21:48 +00:00
Kubernetes Prow Robot
620d71abdf Merge pull request #433 from ankon/patch-1
Fix trivial typo in helm README
2020-11-03 08:50:04 -08:00
Andreas Kohn
85d00ab457 Fix trivial typo in helm README 2020-11-03 13:10:37 +01:00
Kubernetes Prow Robot
b30bd40860 Merge pull request #428 from damemi/component-helper-nodeselector
Start using helpers from k8s.io/component-helpers
2020-11-02 04:52:54 -08:00
Mayank Kumar
4108362158 Add RemovePodsViolatingTopologySpreadConstraint strategy
This adds a strategy to balance pod topology domains based on the scheduler's
PodTopologySpread constraints. It attempts to find the minimum number of pods
that should be sent for eviction by comparing the largest domains in a topology
with the smallest domains in that topology.
2020-10-30 16:36:33 -04:00
Mike Dame
b27dc5f14e go mod tidy && go mod vendor 2020-10-30 09:41:49 -04:00
Mike Dame
3c54378749 Use NodeSelectorRequirementsAsSelector from k8s.io/component-helpers 2020-10-30 09:41:49 -04:00
Kubernetes Prow Robot
6240aa68f7 Merge pull request #430 from damemi/verify-spelling
Add hack/verify-spelling.sh script and Makefile target
2020-10-22 08:27:36 -07:00
Mike Dame
301af7fd9c Add hack/verify-spelling.sh script and Makefile target 2020-10-22 10:20:53 -04:00
Kubernetes Prow Robot
41d529ebe2 Merge pull request #429 from invidian/invidian/fix-typo
pkg/descheduler/strategies: fix typo
2020-10-22 06:59:36 -07:00
Mateusz Gozdek
cc6bb633ba pkg/descheduler/pod: fix typo
Signed-off-by: Mateusz Gozdek <mgozdekof@gmail.com>
2020-10-22 09:25:55 +02:00
Mateusz Gozdek
31cf70c34c pkg/descheduler/strategies: fix typo
Signed-off-by: Mateusz Gozdek <mgozdekof@gmail.com>
2020-10-22 09:25:24 +02:00
Kubernetes Prow Robot
3399619395 Merge pull request #418 from invidian/psp
charts/descheduler: add PodSecurityPolicy support
2020-10-21 23:17:35 -07:00
Kubernetes Prow Robot
cfc4cce08b Merge pull request #421 from m3y/kustomize
Support for remote resources in kustomize
2020-10-21 01:02:20 -07:00
Mateusz Gozdek
f9e9f0654a charts/descheduler: add PodSecurityPolicy support
This commit adds restrictive PodSecurityPolicy, which can be
optionally created, so descheduler can be deployed on clusters with
PodSecurityPolicy admission controller, but which do not ship default
policies.

Signed-off-by: Mateusz Gozdek <mgozdekof@gmail.com>
2020-10-21 08:41:39 +02:00
m3y
73af0e84fa Support for remote resources in kustomize 2020-10-20 03:20:11 +09:00
Kubernetes Prow Robot
b33928ac91 Merge pull request #412 from farah/logging-format
Add logging-format flag
2020-10-12 10:58:48 -07:00
Ali Farah
3ac0c408de Add --logging-format flag
Add k8s.io/component-base/config package
2020-10-12 22:27:39 +11:00
Kubernetes Prow Robot
149f900811 Merge pull request #417 from plutzilla/master
Helm chart README fix
2020-10-07 22:46:15 -07:00
Paulius Leščinskas
9ede04ba9b Update README.md
`kube-system` namespace provided in the installation instruction.
2020-10-02 20:07:24 +03:00
Paulius Leščinskas
f9cbed8b71 Helm chart README fix
Helm 3 omits `--name` flag for release name.
2020-10-02 18:11:06 +03:00
Kubernetes Prow Robot
fa4da031e4 Merge pull request #416 from KohlsTechnology/go-1.15.2
Bump To Go 1.15.2
2020-10-02 07:09:20 -07:00
Sean Malloy
9511f308d0 Bump To Go 1.15.2 2020-10-02 01:02:36 -05:00
Kubernetes Prow Robot
52f43f0fcb Merge pull request #414 from KohlsTechnology/event-recorder-structured-logs
Update Event Logging to Use Structured Logging
2020-10-01 14:25:19 -07:00
Sean Malloy
4bb0ceeed5 Update Event Logging to Use Structured Logging 2020-10-01 00:50:37 -05:00
Kubernetes Prow Robot
279f648e9a Merge pull request #410 from lixiang233/Add_lowNodeUtilization_usecase
Add use case for lowNodeUtilization
2020-09-28 20:57:25 -07:00
Kubernetes Prow Robot
411ec740ff Merge pull request #411 from KohlsTechnology/klogv2
Convert Last Log Message To Structured Logging
2020-09-28 06:14:48 -07:00
Sean Malloy
6237ba5a43 Convert Last Log Message To Structured Logging
The k8s.io/klog/v2 package does not currently support structured logging
for warning level log messages. Therefore update the one call in the
code base using klog.Warningf to instead use klog.InfoS.
2020-09-25 22:58:28 -05:00
Kubernetes Prow Robot
5d65a9ad68 Merge pull request #409 from ingvagabund/flip-some-klog-info-to-error
Change klog info messages after a strategy is exited into error messages
2020-09-25 20:34:47 -07:00
Kubernetes Prow Robot
28f3f867c3 Merge pull request #407 from ingvagabund/structured-klog
Flip Info/Infof/Error to InfoS/ErrorS
2020-09-24 04:22:05 -07:00
lixiang
00f79aa28d add use case for lowNodeUtilization 2020-09-24 17:51:08 +08:00
lixiang
6042d717e9 delete disabled strategies in podLifeTime use case 2020-09-24 17:49:02 +08:00
Jan Chaloupka
7afa54519f Change klog info messages after a strategy is exited into error messages 2020-09-24 11:07:20 +02:00
Kubernetes Prow Robot
8c3a80fbf9 Merge pull request #406 from damemi/duplicates-namespace-filtering
Add Namespace filtering to RemoveDuplicates strategy
2020-09-22 18:16:08 -07:00
Mike Dame
11b9829885 Update README to include strategy params 2020-09-22 10:28:43 -04:00
Jan Chaloupka
4798559545 Flip Info/Infof/Error to InfoS/ErrorS 2020-09-21 09:08:11 +02:00
Mike Dame
8b34d6eb94 Add Namespace filtering to RemoveDuplicates strategy 2020-09-18 12:17:47 -04:00
Kubernetes Prow Robot
70700a1c97 Merge pull request #401 from KohlsTechnology/bump-kind
Update e2e tests to use kind v0.9.0
2020-09-15 09:58:07 -07:00
Sean Malloy
d7420eb945 Update e2e tests to use kind v0.9.0 2020-09-15 09:11:36 -05:00
Kubernetes Prow Robot
c9cfeb35c2 Merge pull request #384 from ingvagabund/refactor-low-node-utilization
Refactor low node utilization
2020-09-13 19:04:58 -07:00
Kubernetes Prow Robot
fda63a816f Merge pull request #397 from farah/farah/add-structured-logging
Convert logs to use structured logs
2020-09-13 18:46:57 -07:00
Ali Farah
6329b6c27b Convert logs to use structured logs 2020-09-12 14:46:16 +10:00
Jan Chaloupka
9b4f781c5c Be verbose about unschedulable nodes which are not considered as underutilized 2020-09-11 12:57:24 +02:00
Jan Chaloupka
63039fcfd6 Compute utilization absolutely, not relatively 2020-09-11 12:57:22 +02:00
Kubernetes Prow Robot
d25f3757d6 Merge pull request #393 from lixiang233/Ft_custom_pod_phase_PodLifeTime
PodLifeTime: allow custom podStatusPhases
2020-09-11 03:46:14 -07:00
lixiang
1303fe6eb9 PodLifeTime: allow custom podStatusPhases 2020-09-11 09:56:45 +08:00
Kubernetes Prow Robot
1682cc9462 Merge pull request #394 from farah/farah/add-structured-logging
Convert logs to use structured logging
2020-09-09 10:15:08 -07:00
Kubernetes Prow Robot
605927676f Merge pull request #395 from damemi/antiaffinity-evictable
Move IsEvictable check in PodAntiAffinity
2020-09-09 07:45:52 -07:00
Jan Chaloupka
dc41e6a41c Remove createNodePodsMap 2020-09-09 16:29:11 +02:00
Ali Farah
e37c27313e Convert logs to use structured logging 2020-09-10 00:22:24 +10:00
Mike Dame
e5d9756ebe Move IsEvictable check in PodAntiAffinity
While non-evictable pods should never be evicted, they should still be
considered when calculating PodAntiAffinity violations. For example, you
may have an evictable pod that should not be running next to a system-critical
static pod. We currently filter IsEvictable before checking for Affinity violations,
so this case would not be caught.
2020-09-09 09:46:50 -04:00
Kubernetes Prow Robot
e6f1c6f78a Merge pull request #392 from KohlsTechnology/helm-hub
Add Link To Helm Hub
2020-09-03 06:59:41 -07:00
Sean Malloy
fceebded6d Add Link To Helm Hub
Updated the README with the link to the official descheduler helm chart
on https://hub.helm.sh. This makes it easier for end users to install the
desceduler using helm.
2020-09-02 23:55:27 -05:00
Kubernetes Prow Robot
08b2dffa42 Merge pull request #376 from farah/farah/add-structured-logging
Change klog to use structured logging
2020-09-02 09:49:06 -07:00
Kubernetes Prow Robot
745e29959c Merge pull request #388 from damemi/helm-chart-1.19
Update Helm chart to v0.19.0
2020-09-01 09:41:53 -07:00
Kubernetes Prow Robot
aa1bab2c4a Merge pull request #389 from KohlsTechnology/docs-0.19
Update Docs and Manifests for v0.19.0
2020-08-31 12:59:50 -07:00
Sean Malloy
19c3e02b44 Update Docs and Manifests for v0.19.0
* Added v0.19 references to README
* Update k8s manifests with v0.19.0 references
2020-08-31 14:41:53 -05:00
Mike Dame
a45057200f Update Helm chart to v0.19.0 2020-08-31 15:39:26 -04:00
Kubernetes Prow Robot
74d6be3943 Merge pull request #371 from KohlsTechnology/go-1.15
Update To Go 1.15.0
2020-08-31 12:22:43 -07:00
Sean Malloy
1fb3445692 Fix golangci-lint Failures For 1.30.0 Upgrade 2020-08-31 14:03:43 -05:00
Sean Malloy
195082d33b Update To Go 1.15.0
As part of the k8s 1.19 release cycle the Go version is being bumped to
1.15.0. Updating the descheduler to use Go 1.15 prior to the descheduler
v0.19.0 release.

See below issues for reference:
* https://github.com/kubernetes/release/issues/1421
* https://github.com/kubernetes/kubernetes/issues/93484
2020-08-31 14:03:43 -05:00
Kubernetes Prow Robot
03dbc93961 Merge pull request #385 from ingvagabund/low-node-utilization-use-clientset-in-testing
LowNodeUtilization: use clientset in testing, drop all custom reactors
2020-08-31 11:48:44 -07:00
Jan Chaloupka
d27f64480b LowNodeUtilization: use clientset in testing, drop all custom reactors 2020-08-31 20:38:01 +02:00
Kubernetes Prow Robot
5645663b71 Merge pull request #387 from KohlsTechnology/contrib-docs
Add KUBECONFIG Export To Contributing Docs
2020-08-31 06:34:28 -07:00
Kubernetes Prow Robot
dbc8092282 Merge pull request #386 from KohlsTechnology/bump-1.19
Bump k8s Modules For k8s 1.19
2020-08-31 06:34:20 -07:00
Ali Farah
50d2b246d9 Change klog to use structured logging
Signed-off-by: Ali Farah <aliyfarah9@gmail.com>
2020-08-31 14:30:08 +10:00
Sean Malloy
6220aca03e Add KUBECONFIG Export To Contributing Docs 2020-08-28 00:24:29 -05:00
Sean Malloy
674993d23a Update Vendor Deps For k8s 1.19 2020-08-27 23:49:48 -05:00
Sean Malloy
f4c3f9b18f Bump k8s Modules For k8s 1.19 2020-08-27 23:48:58 -05:00
Kubernetes Prow Robot
d65a7c4783 Merge pull request #380 from ingvagabund/move-some-flags-under-descheduler-polic
Deprecate node-selector, max-pods-to-evict-per-node and evict-local-storage-pods flags and promote then to policy v1alpha1 fields
2020-08-21 05:15:39 -07:00
Jan Chaloupka
89541f7545 Deprecate node-selector, max-pods-to-evict-per-node and evict-local-storage-pods flags and promote then to policy v1alpha1 fields 2020-08-21 13:27:40 +02:00
Kubernetes Prow Robot
17f769c1c1 Merge pull request #382 from damemi/readme-toc
Add table of contents to README
2020-08-20 12:36:51 -07:00
Mike Dame
eb9e62f047 Add table of contents to README 2020-08-20 15:24:41 -04:00
Kubernetes Prow Robot
6ccd80f2ee Merge pull request #372 from ingvagabund/isevictable
Redefine IsEvictable to be customizable for a particular strategy
2020-08-19 02:53:11 -07:00
Jan Chaloupka
d8251b9086 Redefine IsEvictable to be customizable for a particular strategy
Use WithXXX methods to extend the list of constraints to also
provide a reasonable error message.
2020-08-19 11:21:26 +02:00
Kubernetes Prow Robot
4cd1f45d90 Merge pull request #375 from KohlsTechnology/helm-chart-update
Update Maintainer Details In Helm Chart
2020-08-17 06:42:19 -07:00
Sean Malloy
2dc3f53a13 Update Maintainer Details In Helm Chart
Changing the maintainer for the helm chart to SIG scheduling instead of
an individual person.
2020-08-13 22:15:18 -05:00
Kubernetes Prow Robot
f5d8a02f79 Merge pull request #374 from ingvagabund/namespace-as-pointer
Promote Namespaces field to a pointer
2020-08-13 19:04:21 -07:00
Jan Chaloupka
a7c51ffae0 Promote Namespaces field to a pointer 2020-08-13 18:38:46 +02:00
Kubernetes Prow Robot
9746fd300f Merge pull request #364 from lixiang233/ft_allow_custom_priority_threshold
Allow custom priority threshold
2020-08-12 07:35:45 -07:00
lixiang
b5e17f91cd ClusterRole: add permission to access priorityclasses.scheduling.k8s.io 2020-08-12 13:55:53 +08:00
lixiang
f5524153ba README: add description for priority threshold 2020-08-12 13:55:53 +08:00
lixiang
6d693d06fb e2e: add testcase for priority threshold 2020-08-12 13:55:53 +08:00
lixiang
4d7a6ee9be e2e: Add priority and priority class to runPodLifetimeStrategy and RcByNameContainer 2020-08-12 13:55:53 +08:00
lixiang
0fdaac6042 Strategy: Set threshold priority from strategy's parameters 2020-08-12 13:54:57 +08:00
Kubernetes Prow Robot
bb7ab369d7 Merge pull request #370 from damemi/1.19-rc.4
Bump k8s dependencies to 1.19-rc.4
2020-08-11 06:42:16 -07:00
Mike Dame
d96cca2221 go mod tidy && go mod vendor 2020-08-11 09:32:00 -04:00
Mike Dame
6ee87d9d7c Bump(k8s.io): to 1.19-rc.4 2020-08-11 09:31:43 -04:00
lixiang
95ce2a4ff7 PodEvictor: add a new param thresholdPriority to IsEvictable 2020-08-11 09:57:26 +08:00
Kubernetes Prow Robot
19e1387bf1 Merge pull request #369 from damemi/duplicates-panic
Add check for ownerref length in DuplicatePods strategy
2020-08-10 07:06:20 -07:00
Mike Dame
ec4c5bed5d Add check for ownerref length in DuplicatePods strategy
This fixes a panic that occurs if a pod has no ownerrefs
later on in the strategy. If a pod has no ownerrefs, there's
nothing for us to do with it.
2020-08-10 09:37:05 -04:00
lixiang
ae38aa63af StrategyParameters: Add new param ThresholdPriority and ThresholdPriorityClassName 2020-08-07 13:53:29 +08:00
Kubernetes Prow Robot
cdcd677aa0 Merge pull request #366 from lixiang233/podAntiAffinity_add_missing_validation
Add missing validation in PodAntiAffinity
2020-07-30 07:34:31 -07:00
lixiang
a5eb9fc36d Add missing validation in PodAntiAffinity 2020-07-30 16:44:03 +08:00
Kubernetes Prow Robot
96efd2312b Merge pull request #363 from KohlsTechnology/new-registry-name
Update Container Registry to k8s.gcr.io
2020-07-29 08:29:48 -07:00
Sean Malloy
e7699c4f6b Update Container Registry to k8s.gcr.io
The k8s project recently cut over to the new official k8s.gcr.io
container registry. The descheduler image can now be pulled from
k8s.gcr.io. This is just a new DNS name for the container registry. The
previously documented DNS names for the registry still work, but
require more typing.
2020-07-28 22:45:06 -05:00
Kubernetes Prow Robot
d0fbebb77c Merge pull request #337 from damemi/rebase-1.19
Rebase k8s dependencies to 1.19-rc.2
2020-07-28 10:47:47 -07:00
Kubernetes Prow Robot
46bb5b6f55 Merge pull request #362 from damemi/only-release-tags
Update version parsing to exclude helm-chart tags
2020-07-28 09:31:48 -07:00
Kubernetes Prow Robot
b799ed074a Merge pull request #361 from jjmengze/patch-1
remove unnecessary line feed  in log messages
2020-07-28 09:19:48 -07:00
Kubernetes Prow Robot
eee41ee111 Merge pull request #338 from ingvagabund/filter-out-pods-by-namespaces
Filter pods by namespaces
2020-07-28 08:57:47 -07:00
MengZeLee
6ac81e0b9c remove unnecessary line feed in log messages
If we remove these \n, there will be no misspell errors in go report
2020-07-28 23:27:23 +08:00
Mike Dame
5970899029 Update version parsing to exclude helm-chart tags 2020-07-28 10:59:27 -04:00
Mike Dame
5bb0389538 Update API scheme registration for 1.19 2020-07-28 10:24:21 -04:00
Mike Dame
92cb6a378a Change gnostic/openapiv2 to lowercase in git 2020-07-27 13:55:31 -04:00
Mike Dame
b09932e92a go mod tidy && go mod vendor 2020-07-27 13:55:29 -04:00
Mike Dame
63603f38d6 Pin k8s deps to 1.19-rc.2 2020-07-27 13:55:13 -04:00
Jan Chaloupka
42db31683f README: describe usage of the namespace filtering 2020-07-27 10:57:30 +02:00
Jan Chaloupka
c40a9c397f e2e: add test for included/excluded namespace through PodLifeTime strategy 2020-07-27 10:54:35 +02:00
Kubernetes Prow Robot
4014ebad92 Merge pull request #359 from KohlsTechnology/more-helm-docs
More Helm Documentation
2020-07-26 18:56:16 -07:00
Kubernetes Prow Robot
bb7cb05571 Merge pull request #360 from dharmab/descheduler-autoheal-guide
Add NPD+CA autohealing use case to user guide
2020-07-24 15:20:16 -07:00
Dharma Bellamkonda
30b2bd5d9f Add NPD+CA autohealing use case to user guide 2020-07-24 15:40:47 -06:00
Sean Malloy
8d5ab05aa0 Add Helm Badge To README 2020-07-23 22:14:22 -05:00
Sean Malloy
db501da34d Clean Up End User Helm Documentation
* Correct helm install command in documentation
* Add link to helm install docs from main README
2020-07-23 22:11:17 -05:00
Kubernetes Prow Robot
8d60370612 Merge pull request #358 from damemi/helm-chart-name-docs
Update helm chart name in docs
2020-07-23 16:38:22 -07:00
Mike Dame
052f011288 Update helm chart name in docs 2020-07-23 15:42:40 -04:00
Kubernetes Prow Robot
6e23579bd0 Merge pull request #356 from damemi/update-helm-action
Update Helm release action to work on release branches
2020-07-23 11:05:29 -07:00
Mike Dame
7331f4e5de Update Helm release action to work on release branches
Basing this action on push to `chart-*` tags doesn't work: the action itself
creates the new release tag, so trying to push the changes to a tag ends up
with the releaser comparing the changes to themself (and failing).

This also renames the chart from "descheduler" to "descheduler-helm-chart", to
avoid confusion with automated releases.
2020-07-22 17:12:54 -04:00
Jan Chaloupka
11f1333af7 ListPodsOnANode: allow to include/exclude namespaces
Info: field selector is still not properly mocked so it's not possible to unit test it
2020-07-21 15:08:05 +02:00
Jan Chaloupka
74f70fdbc9 ListPodsOnANode: define Options type to pass various options
Options like:
- filter
- included/excluded namespaces
- labels
2020-07-21 15:07:45 +02:00
Jan Chaloupka
0006fb039d ListPodsOnANode: have one function parameter per each line 2020-07-21 15:02:34 +02:00
4477 changed files with 732307 additions and 180548 deletions

View File

@@ -2,8 +2,8 @@ name: Release Charts
on:
push:
tags:
- chart-*
branches:
- release-*
jobs:
release:
@@ -11,20 +11,21 @@ jobs:
steps:
- name: Checkout
uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Configure Git
run: |
git config user.name "$GITHUB_ACTOR"
git config user.email "$GITHUB_ACTOR@users.noreply.github.com"
- name: Fetch history
run: git fetch --prune --unshallow
- name: Add dependency chart repos
run: |
helm repo add stable https://kubernetes-charts.storage.googleapis.com/
- name: Install Helm
uses: azure/setup-helm@v1
with:
version: v3.4.0
- name: Run chart-releaser
uses: helm/chart-releaser-action@v1.0.0-rc.2
uses: helm/chart-releaser-action@v1.1.0
env:
CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
CR_RELEASE_NAME_TEMPLATE: "descheduler-helm-chart-{{ .Version }}"

2
.gitignore vendored
View File

@@ -3,3 +3,5 @@ _tmp/
vendordiff.patch
.idea/
*.code-workspace
.vscode/
kind

View File

@@ -11,15 +11,19 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
FROM golang:1.14.4
FROM golang:1.17.3
WORKDIR /go/src/sigs.k8s.io/descheduler
COPY . .
RUN make
ARG ARCH
ARG VERSION
RUN VERSION=${VERSION} make build.$ARCH
FROM scratch
MAINTAINER Avesh Agarwal <avesh.ncsu@gmail.com>
MAINTAINER Kubernetes SIG Scheduling <kubernetes-sig-scheduling@googlegroups.com>
USER 1000
COPY --from=0 /go/src/sigs.k8s.io/descheduler/_output/bin/descheduler /bin/descheduler

View File

@@ -13,7 +13,9 @@
# limitations under the License.
FROM scratch
MAINTAINER Avesh Agarwal <avesh.ncsu@gmail.com>
MAINTAINER Kubernetes SIG Scheduling <kubernetes-sig-scheduling@googlegroups.com>
USER 1000
COPY _output/bin/descheduler /bin/descheduler

View File

@@ -14,16 +14,18 @@
.PHONY: test
# VERSION is currently based on the last commit
VERSION?=$(shell git describe --tags)
COMMIT=$(shell git rev-parse HEAD)
# VERSION is based on a date stamp plus the last commit
VERSION?=v$(shell date +%Y%m%d)-$(shell git describe --tags --match "v*")
BRANCH?=$(shell git branch --show-current)
SHA1?=$(shell git rev-parse HEAD)
BUILD=$(shell date +%FT%T%z)
LDFLAG_LOCATION=sigs.k8s.io/descheduler/cmd/descheduler/app
LDFLAG_LOCATION=sigs.k8s.io/descheduler/pkg/version
ARCHS = amd64 arm arm64
LDFLAGS=-ldflags "-X ${LDFLAG_LOCATION}.version=${VERSION} -X ${LDFLAG_LOCATION}.buildDate=${BUILD} -X ${LDFLAG_LOCATION}.gitCommit=${COMMIT}"
LDFLAGS=-ldflags "-X ${LDFLAG_LOCATION}.version=${VERSION} -X ${LDFLAG_LOCATION}.buildDate=${BUILD} -X ${LDFLAG_LOCATION}.gitbranch=${BRANCH} -X ${LDFLAG_LOCATION}.gitsha1=${SHA1}"
GOLANGCI_VERSION := v1.15.0
HAS_GOLANGCI := $(shell ls _output/bin/golangci-lint)
GOLANGCI_VERSION := v1.43.0
HAS_GOLANGCI := $(shell ls _output/bin/golangci-lint 2> /dev/null)
# REGISTRY is the container registry to push
# into. The default is to push to the staging
@@ -41,30 +43,65 @@ IMAGE_GCLOUD:=$(REGISTRY)/descheduler:$(VERSION)
# In the future binaries can be uploaded to
# GCS bucket gs://k8s-staging-descheduler.
HAS_HELM := $(shell which helm)
HAS_HELM := $(shell which helm 2> /dev/null)
all: build
build:
CGO_ENABLED=0 go build ${LDFLAGS} -o _output/bin/descheduler sigs.k8s.io/descheduler/cmd/descheduler
build.amd64:
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build ${LDFLAGS} -o _output/bin/descheduler sigs.k8s.io/descheduler/cmd/descheduler
build.arm:
CGO_ENABLED=0 GOOS=linux GOARCH=arm GOARM=7 go build ${LDFLAGS} -o _output/bin/descheduler sigs.k8s.io/descheduler/cmd/descheduler
build.arm64:
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build ${LDFLAGS} -o _output/bin/descheduler sigs.k8s.io/descheduler/cmd/descheduler
dev-image: build
docker build -f Dockerfile.dev -t $(IMAGE) .
image:
docker build -t $(IMAGE) .
docker build --build-arg VERSION="$(VERSION)" --build-arg ARCH="amd64" -t $(IMAGE) .
push-container-to-gcloud: image
image.amd64:
docker build --build-arg VERSION="$(VERSION)" --build-arg ARCH="amd64" -t $(IMAGE)-amd64 .
image.arm:
docker build --build-arg VERSION="$(VERSION)" --build-arg ARCH="arm" -t $(IMAGE)-arm .
image.arm64:
docker build --build-arg VERSION="$(VERSION)" --build-arg ARCH="arm64" -t $(IMAGE)-arm64 .
push: image
gcloud auth configure-docker
docker tag $(IMAGE) $(IMAGE_GCLOUD)
docker push $(IMAGE_GCLOUD)
push: push-container-to-gcloud
push-all: image.amd64 image.arm image.arm64
gcloud auth configure-docker
for arch in $(ARCHS); do \
docker tag $(IMAGE)-$${arch} $(IMAGE_GCLOUD)-$${arch} ;\
docker push $(IMAGE_GCLOUD)-$${arch} ;\
done
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create $(IMAGE_GCLOUD) $(addprefix --amend $(IMAGE_GCLOUD)-, $(ARCHS))
for arch in $(ARCHS); do \
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest annotate --arch $${arch} $(IMAGE_GCLOUD) $(IMAGE_GCLOUD)-$${arch} ;\
done
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push $(IMAGE_GCLOUD) ;\
clean:
rm -rf _output
rm -rf _tmp
verify: verify-gofmt verify-vendor lint lint-chart
verify: verify-govet verify-spelling verify-gofmt verify-vendor lint lint-chart verify-toc verify-gen
verify-govet:
./hack/verify-govet.sh
verify-spelling:
./hack/verify-spelling.sh
verify-gofmt:
./hack/verify-gofmt.sh
@@ -72,6 +109,9 @@ verify-gofmt:
verify-vendor:
./hack/verify-vendor.sh
verify-toc:
./hack/verify-toc.sh
test-unit:
./test/run-unit-tests.sh
@@ -82,15 +122,26 @@ gen:
./hack/update-generated-conversions.sh
./hack/update-generated-deep-copies.sh
./hack/update-generated-defaulters.sh
./hack/update-toc.sh
verify-gen:
./hack/verify-conversions.sh
./hack/verify-deep-copies.sh
./hack/verify-defaulters.sh
lint:
ifndef HAS_GOLANGCI
curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | sh -s -- -b ./_output/bin ${GOLANGCI_VERSION}
curl -sSfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s -- -b ./_output/bin ${GOLANGCI_VERSION}
endif
./_output/bin/golangci-lint run
lint-chart:
lint-chart: ensure-helm-install
helm lint ./charts/descheduler
test-helm: ensure-helm-install
./test/run-helm-tests.sh
ensure-helm-install:
ifndef HAS_HELM
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 && chmod 700 ./get_helm.sh && ./get_helm.sh
endif
helm lint ./charts/descheduler
endif

10
OWNERS
View File

@@ -1,8 +1,7 @@
approvers:
- aveshagarwal
- k82cn
- ravisantoshgudimetla
- damemi
- ingvagabund
- seanmalloy
reviewers:
- aveshagarwal
- k82cn
@@ -10,3 +9,8 @@ reviewers:
- damemi
- seanmalloy
- ingvagabund
- lixiang233
emeritus_approvers:
- aveshagarwal
- k82cn
- ravisantoshgudimetla

623
README.md
View File

@@ -1,9 +1,8 @@
[![Go Report Card](https://goreportcard.com/badge/kubernetes-sigs/descheduler)](https://goreportcard.com/report/sigs.k8s.io/descheduler)
[![Go Report Card](https://goreportcard.com/badge/sigs.k8s.io/descheduler)](https://goreportcard.com/report/sigs.k8s.io/descheduler)
![Release Charts](https://github.com/kubernetes-sigs/descheduler/workflows/Release%20Charts/badge.svg)
# Descheduler for Kubernetes
## Introduction
Scheduling in Kubernetes is the process of binding pending pods to nodes, and is performed by
a component of Kubernetes called kube-scheduler. The scheduler's decisions, whether or where a
pod can or can not be scheduled, are guided by its configurable policy which comprises of set of
@@ -23,9 +22,45 @@ Descheduler, based on its policy, finds pods that can be moved and evicts them.
note, in current implementation, descheduler does not schedule replacement of evicted pods
but relies on the default scheduler for that.
Table of Contents
=================
<!-- toc -->
- [Quick Start](#quick-start)
- [Run As A Job](#run-as-a-job)
- [Run As A CronJob](#run-as-a-cronjob)
- [Run As A Deployment](#run-as-a-deployment)
- [Install Using Helm](#install-using-helm)
- [Install Using Kustomize](#install-using-kustomize)
- [User Guide](#user-guide)
- [Policy and Strategies](#policy-and-strategies)
- [RemoveDuplicates](#removeduplicates)
- [LowNodeUtilization](#lownodeutilization)
- [HighNodeUtilization](#highnodeutilization)
- [RemovePodsViolatingInterPodAntiAffinity](#removepodsviolatinginterpodantiaffinity)
- [RemovePodsViolatingNodeAffinity](#removepodsviolatingnodeaffinity)
- [RemovePodsViolatingNodeTaints](#removepodsviolatingnodetaints)
- [RemovePodsViolatingTopologySpreadConstraint](#removepodsviolatingtopologyspreadconstraint)
- [RemovePodsHavingTooManyRestarts](#removepodshavingtoomanyrestarts)
- [PodLifeTime](#podlifetime)
- [RemoveFailedPods](#removefailedpods)
- [Filter Pods](#filter-pods)
- [Namespace filtering](#namespace-filtering)
- [Priority filtering](#priority-filtering)
- [Label filtering](#label-filtering)
- [Node Fit filtering](#node-fit-filtering)
- [Pod Evictions](#pod-evictions)
- [Pod Disruption Budget (PDB)](#pod-disruption-budget-pdb)
- [Metrics](#metrics)
- [Compatibility Matrix](#compatibility-matrix)
- [Getting Involved and Contributing](#getting-involved-and-contributing)
- [Communicating With Contributors](#communicating-with-contributors)
- [Roadmap](#roadmap)
- [Code of conduct](#code-of-conduct)
<!-- /toc -->
## Quick Start
The descheduler can be run as a Job or CronJob inside of a k8s cluster. It has the
The descheduler can be run as a `Job`, `CronJob`, or `Deployment` inside of a k8s cluster. It has the
advantage of being able to be run multiple times without needing user intervention.
The descheduler pod is run as a critical pod in the `kube-system` namespace to avoid
being evicted by itself or by the kubelet.
@@ -33,17 +68,52 @@ being evicted by itself or by the kubelet.
### Run As A Job
```
kubectl create -f kubernetes/rbac.yaml
kubectl create -f kubernetes/configmap.yaml
kubectl create -f kubernetes/job.yaml
kubectl create -f kubernetes/base/rbac.yaml
kubectl create -f kubernetes/base/configmap.yaml
kubectl create -f kubernetes/job/job.yaml
```
### Run As A CronJob
```
kubectl create -f kubernetes/rbac.yaml
kubectl create -f kubernetes/configmap.yaml
kubectl create -f kubernetes/cronjob.yaml
kubectl create -f kubernetes/base/rbac.yaml
kubectl create -f kubernetes/base/configmap.yaml
kubectl create -f kubernetes/cronjob/cronjob.yaml
```
### Run As A Deployment
```
kubectl create -f kubernetes/base/rbac.yaml
kubectl create -f kubernetes/base/configmap.yaml
kubectl create -f kubernetes/deployment/deployment.yaml
```
### Install Using Helm
Starting with release v0.18.0 there is an official helm chart that can be used to install the
descheduler. See the [helm chart README](https://github.com/kubernetes-sigs/descheduler/blob/master/charts/descheduler/README.md) for detailed instructions.
The descheduler helm chart is also listed on the [artifact hub](https://artifacthub.io/packages/helm/descheduler/descheduler).
### Install Using Kustomize
You can use kustomize to install descheduler.
See the [resources | Kustomize](https://kubectl.docs.kubernetes.io/references/kustomize/resource/) for detailed instructions.
Run As A Job
```
kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/job?ref=v0.22.0' | kubectl apply -f -
```
Run As A CronJob
```
kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/cronjob?ref=v0.22.0' | kubectl apply -f -
```
Run As A Deployment
```
kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/deployment?ref=v0.22.0' | kubectl apply -f -
```
## User Guide
@@ -52,25 +122,67 @@ See the [user guide](docs/user-guide.md) in the `/docs` directory.
## Policy and Strategies
Descheduler's policy is configurable and includes strategies that can be enabled or disabled.
Seven strategies `RemoveDuplicates`, `LowNodeUtilization`, `RemovePodsViolatingInterPodAntiAffinity`,
`RemovePodsViolatingNodeAffinity`, `RemovePodsViolatingNodeTaints`, `RemovePodsHavingTooManyRestarts`, and `PodLifeTime`
are currently implemented. As part of the policy, the parameters associated with the strategies can be configured too.
By default, all strategies are enabled.
Descheduler's policy is configurable and includes strategies that can be enabled or disabled. By default, all strategies are enabled.
The policy includes a common configuration that applies to all the strategies:
| Name | Default Value | Description |
|------|---------------|-------------|
| `nodeSelector` | `nil` | limiting the nodes which are processed |
| `evictLocalStoragePods` | `false` | allows eviction of pods with local storage |
| `evictSystemCriticalPods` | `false` | [Warning: Will evict Kubernetes system pods] allows eviction of pods with any priority, including system pods like kube-dns |
| `ignorePvcPods` | `false` | set whether PVC pods should be evicted or ignored |
| `maxNoOfPodsToEvictPerNode` | `nil` | maximum number of pods evicted from each node (summed through all strategies) |
| `evictFailedBarePods` | `false` | allow eviction of pods without owner references and in failed phase |
As part of the policy, the parameters associated with each strategy can be configured.
See each strategy for details on available parameters.
**Policy:**
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
nodeSelector: prod=dev
evictFailedBarePods: false
evictLocalStoragePods: true
evictSystemCriticalPods: true
maxNoOfPodsToEvictPerNode: 40
ignorePvcPods: false
strategies:
...
```
The following diagram provides a visualization of most of the strategies to help
categorize how strategies fit together.
![Strategies diagram](strategies_diagram.png)
### RemoveDuplicates
This strategy makes sure that there is only one pod associated with a Replica Set (RS),
Replication Controller (RC), Deployment, or Job running on the same node. If there are more,
This strategy makes sure that there is only one pod associated with a ReplicaSet (RS),
ReplicationController (RC), StatefulSet, or Job running on the same node. If there are more,
those duplicate pods are evicted for better spreading of pods in a cluster. This issue could happen
if some nodes went down due to whatever reasons, and pods on them were moved to other nodes leading to
more than one pod associated with a RS or RC, for example, running on the same node. Once the failed nodes
are ready again, this strategy could be enabled to evict those duplicate pods.
It provides one optional parameter, `ExcludeOwnerKinds`, which is a list of OwnerRef `Kind`s. If a pod
has any of these `Kind`s listed as an `OwnerRef`, that pod will not be considered for eviction.
It provides one optional parameter, `excludeOwnerKinds`, which is a list of OwnerRef `Kind`s. If a pod
has any of these `Kind`s listed as an `OwnerRef`, that pod will not be considered for eviction. Note that
pods created by Deployments are considered for eviction by this strategy. The `excludeOwnerKinds` parameter
should include `ReplicaSet` to have pods created by Deployments excluded.
```
**Parameters:**
|Name|Type|
|---|---|
|`excludeOwnerKinds`|list(string)|
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
**Example:**
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
@@ -89,20 +201,37 @@ in the hope that recreation of evicted pods will be scheduled on these underutil
parameters of this strategy are configured under `nodeResourceUtilizationThresholds`.
The under utilization of nodes is determined by a configurable threshold `thresholds`. The threshold
`thresholds` can be configured for cpu, memory, and number of pods in terms of percentage. If a node's
usage is below threshold for all (cpu, memory, and number of pods), the node is considered underutilized.
`thresholds` can be configured for cpu, memory, number of pods, and extended resources in terms of percentage (the percentage is
calculated as the current resources requested on the node vs [total allocatable](https://kubernetes.io/docs/concepts/architecture/nodes/#capacity).
For pods, this means the number of pods on the node as a fraction of the pod capacity set for that node).
If a node's usage is below threshold for all (cpu, memory, number of pods and extended resources), the node is considered underutilized.
Currently, pods request resource requirements are considered for computing node resource utilization.
There is another configurable threshold, `targetThresholds`, that is used to compute those potential nodes
from where pods could be evicted. If a node's usage is above targetThreshold for any (cpu, memory, or number of pods),
from where pods could be evicted. If a node's usage is above targetThreshold for any (cpu, memory, number of pods, or extended resources),
the node is considered over utilized. Any node between the thresholds, `thresholds` and `targetThresholds` is
considered appropriately utilized and is not considered for eviction. The threshold, `targetThresholds`,
can be configured for cpu, memory, and number of pods too in terms of percentage.
These thresholds, `thresholds` and `targetThresholds`, could be tuned as per your cluster requirements.
Here is an example of a policy for this strategy:
These thresholds, `thresholds` and `targetThresholds`, could be tuned as per your cluster requirements. Note that this
strategy evicts pods from `overutilized nodes` (those with usage above `targetThresholds`) to `underutilized nodes`
(those with usage below `thresholds`), it will abort if any number of `underutilized nodes` or `overutilized nodes` is zero.
```
**Parameters:**
|Name|Type|
|---|---|
|`thresholds`|map(string:int)|
|`targetThresholds`|map(string:int)|
|`numberOfNodes`|int|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
**Example:**
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
@@ -121,34 +250,104 @@ strategies:
```
Policy should pass the following validation checks:
* Only three types of resources are supported: `cpu`, `memory` and `pods`.
* Three basic native types of resources are supported: `cpu`, `memory` and `pods`.
If any of these resource types is not specified, all its thresholds default to 100% to avoid nodes going from underutilized to overutilized.
* Extended resources are supported. For example, resource type `nvidia.com/gpu` is specified for GPU node utilization. Extended resources are optional,
and will not be used to compute node's usage if it's not specified in `thresholds` and `targetThresholds` explicitly.
* `thresholds` or `targetThresholds` can not be nil and they must configure exactly the same types of resources.
* The valid range of the resource's percentage value is \[0, 100\]
* Percentage value of `thresholds` can not be greater than `targetThresholds` for the same resource.
If any of the resource types is not specified, all its thresholds default to 100% to avoid nodes going
from underutilized to overutilized.
There is another parameter associated with the `LowNodeUtilization` strategy, called `numberOfNodes`.
This parameter can be configured to activate the strategy only when the number of under utilized nodes
are above the configured value. This could be helpful in large clusters where a few nodes could go
under utilized frequently or for a short period of time. By default, `numberOfNodes` is set to zero.
### HighNodeUtilization
This strategy finds nodes that are under utilized and evicts pods from the nodes in the hope that these pods will be
scheduled compactly into fewer nodes. Used in conjunction with node auto-scaling, this strategy is intended to help
trigger down scaling of under utilized nodes.
This strategy **must** be used with the scheduler strategy `MostRequestedPriority`. The parameters of this strategy are
configured under `nodeResourceUtilizationThresholds`.
The under utilization of nodes is determined by a configurable threshold `thresholds`. The threshold
`thresholds` can be configured for cpu, memory, number of pods, and extended resources in terms of percentage. The percentage is
calculated as the current resources requested on the node vs [total allocatable](https://kubernetes.io/docs/concepts/architecture/nodes/#capacity).
For pods, this means the number of pods on the node as a fraction of the pod capacity set for that node.
If a node's usage is below threshold for all (cpu, memory, number of pods and extended resources), the node is considered underutilized.
Currently, pods request resource requirements are considered for computing node resource utilization.
Any node above `thresholds` is considered appropriately utilized and is not considered for eviction.
The `thresholds` param could be tuned as per your cluster requirements. Note that this
strategy evicts pods from `underutilized nodes` (those with usage below `thresholds`)
so that they can be recreated in appropriately utilized nodes.
The strategy will abort if any number of `underutilized nodes` or `appropriately utilized nodes` is zero.
**Parameters:**
|Name|Type|
|---|---|
|`thresholds`|map(string:int)|
|`numberOfNodes`|int|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
**Example:**
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"HighNodeUtilization":
enabled: true
params:
nodeResourceUtilizationThresholds:
thresholds:
"cpu" : 20
"memory": 20
"pods": 20
```
Policy should pass the following validation checks:
* Three basic native types of resources are supported: `cpu`, `memory` and `pods`. If any of these resource types is not specified, all its thresholds default to 100%.
* Extended resources are supported. For example, resource type `nvidia.com/gpu` is specified for GPU node utilization. Extended resources are optional, and will not be used to compute node's usage if it's not specified in `thresholds` explicitly.
* `thresholds` can not be nil.
* The valid range of the resource's percentage value is \[0, 100\]
There is another parameter associated with the `HighNodeUtilization` strategy, called `numberOfNodes`.
This parameter can be configured to activate the strategy only when the number of under utilized nodes
is above the configured value. This could be helpful in large clusters where a few nodes could go
under utilized frequently or for a short period of time. By default, `numberOfNodes` is set to zero.
### RemovePodsViolatingInterPodAntiAffinity
This strategy makes sure that pods violating interpod anti-affinity are removed from nodes. For example,
if there is podA on a node and podB and podC (running on the same node) have anti-affinity rules which prohibit
them to run on the same node, then podA will be evicted from the node so that podB and podC could run. This
issue could happen, when the anti-affinity rules for podB and podC are created when they are already running on
node. Currently, there are no parameters associated with this strategy. To disable this strategy, the
policy should look like:
node.
```
**Parameters:**
|Name|Type|
|---|---|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|`labelSelector`|(see [label filtering](#label-filtering))|
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
**Example:**
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"RemovePodsViolatingInterPodAntiAffinity":
enabled: false
enabled: true
```
### RemovePodsViolatingNodeAffinity
@@ -169,9 +368,20 @@ of scheduling. Over time nodeA stops to satisfy the rule. When the strategy gets
executed and there is another node available that satisfies the node affinity rule,
podA gets evicted from nodeA.
The policy file should look like:
**Parameters:**
```
|Name|Type|
|---|---|
|`nodeAffinityType`|list(string)|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|`labelSelector`|(see [label filtering](#label-filtering))|
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
**Example:**
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
@@ -187,9 +397,21 @@ strategies:
This strategy makes sure that pods violating NoSchedule taints on nodes are removed. For example there is a
pod "podA" with a toleration to tolerate a taint ``key=value:NoSchedule`` scheduled and running on the tainted
node. If the node's taint is subsequently updated/removed, taint is no longer satisfied by its pods' tolerations
and will be evicted. The policy file should look like:
and will be evicted.
````
**Parameters:**
|Name|Type|
|---|---|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|`labelSelector`|(see [label filtering](#label-filtering))|
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
**Example:**
````yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
@@ -197,11 +419,64 @@ strategies:
enabled: true
````
### RemovePodsViolatingTopologySpreadConstraint
This strategy makes sure that pods violating [topology spread constraints](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/)
are evicted from nodes. Specifically, it tries to evict the minimum number of pods required to balance topology domains to within each constraint's `maxSkew`.
This strategy requires k8s version 1.18 at a minimum.
By default, this strategy only deals with hard constraints, setting parameter `includeSoftConstraints` to `true` will
include soft constraints.
Strategy parameter `labelSelector` is not utilized when balancing topology domains and is only applied during eviction to determine if the pod can be evicted.
**Parameters:**
|Name|Type|
|---|---|
|`includeSoftConstraints`|bool|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|`labelSelector`|(see [label filtering](#label-filtering))|
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
**Example:**
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"RemovePodsViolatingTopologySpreadConstraint":
enabled: true
params:
includeSoftConstraints: false
```
### RemovePodsHavingTooManyRestarts
This strategy makes sure that pods having too many restarts are removed from nodes. For example a pod with EBS/PD that can't get the volume/disk attached to the instance, then the pod should be re-scheduled to other nodes.
This strategy makes sure that pods having too many restarts are removed from nodes. For example a pod with EBS/PD that
can't get the volume/disk attached to the instance, then the pod should be re-scheduled to other nodes. Its parameters
include `podRestartThreshold`, which is the number of restarts (summed over all eligible containers) at which a pod
should be evicted, and `includingInitContainers`, which determines whether init container restarts should be factored
into that calculation.
```
**Parameters:**
|Name|Type|
|---|---|
|`podRestartThreshold`|int|
|`includingInitContainers`|bool|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|`labelSelector`|(see [label filtering](#label-filtering))|
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
**Example:**
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
@@ -215,33 +490,269 @@ strategies:
### PodLifeTime
This strategy evicts pods that are older than `.strategies.PodLifeTime.params.maxPodLifeTimeSeconds` The policy
file should look like:
This strategy evicts pods that are older than `maxPodLifeTimeSeconds`.
````
You can also specify `podStatusPhases` to `only` evict pods with specific `StatusPhases`, currently this parameter is limited
to `Running` and `Pending`.
**Parameters:**
|Name|Type|
|---|---|
|`maxPodLifeTimeSeconds`|int|
|`podStatusPhases`|list(string)|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|`labelSelector`|(see [label filtering](#label-filtering))|
**Example:**
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"PodLifeTime":
enabled: true
params:
podLifeTime:
maxPodLifeTimeSeconds: 86400
podStatusPhases:
- "Pending"
```
### RemoveFailedPods
This strategy evicts pods that are in failed status phase.
You can provide an optional parameter to filter by failed `reasons`.
`reasons` can be expanded to include reasons of InitContainers as well by setting the optional parameter `includingInitContainers` to `true`.
You can specify an optional parameter `minPodLifetimeSeconds` to evict pods that are older than specified seconds.
Lastly, you can specify the optional parameter `excludeOwnerKinds` and if a pod
has any of these `Kind`s listed as an `OwnerRef`, that pod will not be considered for eviction.
**Parameters:**
|Name|Type|
|---|---|
|`minPodLifetimeSeconds`|uint|
|`excludeOwnerKinds`|list(string)|
|`reasons`|list(string)|
|`includingInitContainers`|bool|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|`labelSelector`|(see [label filtering](#label-filtering))|
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
**Example:**
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"RemoveFailedPods":
enabled: true
params:
failedPods:
reasons:
- "NodeAffinity"
includingInitContainers: true
excludeOwnerKinds:
- "Job"
minPodLifetimeSeconds: 3600
```
## Filter Pods
### Namespace filtering
The following strategies accept a `namespaces` parameter which allows to specify a list of including, resp. excluding namespaces:
* `PodLifeTime`
* `RemovePodsHavingTooManyRestarts`
* `RemovePodsViolatingNodeTaints`
* `RemovePodsViolatingNodeAffinity`
* `RemovePodsViolatingInterPodAntiAffinity`
* `RemoveDuplicates`
* `RemovePodsViolatingTopologySpreadConstraint`
* `RemoveFailedPods`
For example:
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"PodLifeTime":
enabled: true
params:
podLifeTime:
maxPodLifeTimeSeconds: 86400
namespaces:
include:
- "namespace1"
- "namespace2"
```
In the examples `PodLifeTime` gets executed only over `namespace1` and `namespace2`.
The similar holds for `exclude` field:
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"PodLifeTime":
enabled: true
params:
podLifeTime:
maxPodLifeTimeSeconds: 86400
namespaces:
exclude:
- "namespace1"
- "namespace2"
```
The strategy gets executed over all namespaces but `namespace1` and `namespace2`.
It's not allowed to compute `include` with `exclude` field.
### Priority filtering
All strategies are able to configure a priority threshold, only pods under the threshold can be evicted. You can
specify this threshold by setting `thresholdPriorityClassName`(setting the threshold to the value of the given
priority class) or `thresholdPriority`(directly setting the threshold) parameters. By default, this threshold
is set to the value of `system-cluster-critical` priority class.
Note: Setting `evictSystemCriticalPods` to true disables priority filtering entirely.
E.g.
Setting `thresholdPriority`
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"PodLifeTime":
enabled: true
params:
podLifeTime:
maxPodLifeTimeSeconds: 86400
thresholdPriority: 10000
```
Setting `thresholdPriorityClassName`
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"PodLifeTime":
enabled: true
params:
podLifeTime:
maxPodLifeTimeSeconds: 86400
thresholdPriorityClassName: "priorityclass1"
```
Note that you can't configure both `thresholdPriority` and `thresholdPriorityClassName`, if the given priority class
does not exist, descheduler won't create it and will throw an error.
### Label filtering
The following strategies can configure a [standard kubernetes labelSelector](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#labelselector-v1-meta)
to filter pods by their labels:
* `PodLifeTime`
* `RemovePodsHavingTooManyRestarts`
* `RemovePodsViolatingNodeTaints`
* `RemovePodsViolatingNodeAffinity`
* `RemovePodsViolatingInterPodAntiAffinity`
* `RemovePodsViolatingTopologySpreadConstraint`
* `RemoveFailedPods`
This allows running strategies among pods the descheduler is interested in.
For example:
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"PodLifeTime":
enabled: true
params:
podLifeTime:
maxPodLifeTimeSeconds: 86400
````
labelSelector:
matchLabels:
component: redis
matchExpressions:
- {key: tier, operator: In, values: [cache]}
- {key: environment, operator: NotIn, values: [dev]}
```
### Node Fit filtering
The following strategies accept a `nodeFit` boolean parameter which can optimize descheduling:
* `RemoveDuplicates`
* `LowNodeUtilization`
* `HighNodeUtilization`
* `RemovePodsViolatingInterPodAntiAffinity`
* `RemovePodsViolatingNodeAffinity`
* `RemovePodsViolatingNodeTaints`
* `RemovePodsViolatingTopologySpreadConstraint`
* `RemovePodsHavingTooManyRestarts`
* `RemoveFailedPods`
If set to `true` the descheduler will consider whether or not the pods that meet eviction criteria will fit on other nodes before evicting them. If a pod cannot be rescheduled to another node, it will not be evicted. Currently the following criteria are considered when setting `nodeFit` to `true`:
- A `nodeSelector` on the pod
- Any `Tolerations` on the pod and any `Taints` on the other nodes
- `nodeAffinity` on the pod
- Whether any of the other nodes are marked as `unschedulable`
E.g.
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"LowNodeUtilization":
enabled: true
params:
nodeResourceUtilizationThresholds:
thresholds:
"cpu" : 20
"memory": 20
"pods": 20
targetThresholds:
"cpu" : 50
"memory": 50
"pods": 50
nodeFit: true
```
Note that node fit filtering references the current pod spec, and not that of it's owner.
Thus, if the pod is owned by a ReplicationController (and that ReplicationController was modified recently),
the pod may be running with an outdated spec, which the descheduler will reference when determining node fit.
This is expected behavior as the descheduler is a "best-effort" mechanism.
Using Deployments instead of ReplicationControllers provides an automated rollout of pod spec changes, therefore ensuring that the descheduler has an up-to-date view of the cluster state.
## Pod Evictions
When the descheduler decides to evict pods from a node, it employs the following general mechanism:
* [Critical pods](https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/) (with priorityClassName set to system-cluster-critical or system-node-critical) are never evicted.
* Pods (static or mirrored pods or stand alone pods) not part of an RC, RS, Deployment or Job are
never evicted because these pods won't be recreated.
* [Critical pods](https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/) (with priorityClassName set to system-cluster-critical or system-node-critical) are never evicted (unless `evictSystemCriticalPods: true` is set).
* Pods (static or mirrored pods or standalone pods) not part of an ReplicationController, ReplicaSet(Deployment), StatefulSet, or Job are
never evicted because these pods won't be recreated. (Standalone pods in failed status phase can be evicted by setting `evictFailedBarePods: true`)
* Pods associated with DaemonSets are never evicted.
* Pods with local storage are never evicted.
* Pods with local storage are never evicted (unless `evictLocalStoragePods: true` is set).
* Pods with PVCs are evicted (unless `ignorePvcPods: true` is set).
* In `LowNodeUtilization` and `RemovePodsViolatingInterPodAntiAffinity`, pods are evicted by their priority from low to high, and if they have same priority,
best effort pods are evicted before burstable and guaranteed pods.
* All types of pods with the annotation descheduler.alpha.kubernetes.io/evict are evicted. This
* All types of pods with the annotation `descheduler.alpha.kubernetes.io/evict` are eligible for eviction. This
annotation is used to override checks which prevent eviction and users can select which pod is evicted.
Users should know how and if the pod will be recreated.
* Pods with a non-nil DeletionTimestamp are not evicted by default.
Setting `--v=4` or greater on the Descheduler will log all reasons why any pod is not evictable.
@@ -250,6 +761,16 @@ Setting `--v=4` or greater on the Descheduler will log all reasons why any pod i
Pods subject to a Pod Disruption Budget(PDB) are not evicted if descheduling violates its PDB. The pods
are evicted by using the eviction subresource to handle PDB.
## Metrics
| name | type | description |
|-------|-------|----------------|
| build_info | gauge | constant 1 |
| pods_evicted | CounterVec | total number of pods evicted |
The metrics are served through https://localhost:10258/metrics by default.
The address and port can be changed by setting `--binding-address` and `--secure-port` flags.
## Compatibility Matrix
The below compatibility matrix shows the k8s client package(client-go, apimachinery, etc) versions that descheduler
is compiled with. At this time descheduler does not have a hard dependency to a specific k8s release. However a
@@ -261,6 +782,10 @@ packages that it is compiled with.
Descheduler | Supported Kubernetes Version
-------------|-----------------------------
v0.22 | v1.22
v0.21 | v1.21
v0.20 | v1.20
v0.19 | v1.19
v0.18 | v1.18
v0.10 | v1.17
v0.4-v0.9 | v1.9+

View File

@@ -1,7 +1,7 @@
apiVersion: v1
name: descheduler
version: 0.18.0
appVersion: 0.18.0
version: 0.23.1
appVersion: 0.23.0
description: Descheduler for Kubernetes is used to rebalance clusters by evicting pods that can potentially be scheduled on better nodes. In the current implementation, descheduler does not schedule replacement of evicted pods but relies on the default scheduler for that.
keywords:
- kubernetes
@@ -12,5 +12,5 @@ icon: https://kubernetes.io/images/favicon.png
sources:
- https://github.com/kubernetes-sigs/descheduler
maintainers:
- name: stevehipwell
email: steve.hipwell@github.com
- name: Kubernetes SIG Scheduling
email: kubernetes-sig-scheduling@googlegroups.com

View File

@@ -6,12 +6,12 @@
```shell
helm repo add descheduler https://kubernetes-sigs.github.io/descheduler/
$ helm install descheduler/descheduler --name my-release
helm install my-release --namespace kube-system descheduler/descheduler
```
## Introduction
This chart bootstraps a [desheduler](https://github.com/kubernetes-sigs/descheduler/) cron job on a [Kubernetes](http://kubernetes.io) cluster using the [Helm](https://helm.sh) package manager.
This chart bootstraps a [descheduler](https://github.com/kubernetes-sigs/descheduler/) cron job on a [Kubernetes](http://kubernetes.io) cluster using the [Helm](https://helm.sh) package manager.
## Prerequisites
@@ -22,7 +22,7 @@ This chart bootstraps a [desheduler](https://github.com/kubernetes-sigs/deschedu
To install the chart with the release name `my-release`:
```shell
helm install --name my-release descheduler/descheduler
helm install --namespace kube-system my-release descheduler/descheduler
```
The command deploys _descheduler_ on the Kubernetes cluster in the default configuration. The [configuration](#configuration) section lists the parameters that can be configured during installation.
@@ -43,17 +43,30 @@ The command removes all the Kubernetes components associated with the chart and
The following table lists the configurable parameters of the _descheduler_ chart and their default values.
| Parameter | Description | Default |
| ------------------------------ | --------------------------------------------------------------------------------------------------------------------- | ------------------------------------------------------ |
| `image.repository` | Docker repository to use | `us.gcr.io/k8s-artifacts-prod/descheduler/descheduler` |
| `image.tag` | Docker tag to use | `v[chart appVersion]` |
| `image.pullPolicy` | Docker image pull policy | `IfNotPresent` |
| `nameOverride` | String to partially override `descheduler.fullname` template (will prepend the release name) | `""` |
| `fullnameOverride` | String to fully override `descheduler.fullname` template | `""` |
| `schedule` | The cron schedule to run the _descheduler_ job on | `"*/2 * * * *"` |
| `cmdOptions` | The options to pass to the _descheduler_ command | _see values.yaml_ |
| `deschedulerPolicy.strategies` | The _descheduler_ strategies to apply | _see values.yaml_ |
| `priorityClassName` | The name of the priority class to add to pods | `system-cluster-critical` |
| `rbac.create` | If `true`, create & use RBAC resources | `true` |
| `serviceAccount.create` | If `true`, create a service account for the cron job | `true` |
| `serviceAccount.name` | The name of the service account to use, if not set and create is true a name is generated using the fullname template | `nil` |
| Parameter | Description | Default |
| ------------------------------ | --------------------------------------------------------------------------------------------------------------------- | ------------------------------------ |
| `kind` | Use as CronJob or Deployment | `CronJob` |
| `image.repository` | Docker repository to use | `k8s.gcr.io/descheduler/descheduler` |
| `image.tag` | Docker tag to use | `v[chart appVersion]` |
| `image.pullPolicy` | Docker image pull policy | `IfNotPresent` |
| `imagePullSecrets` | Docker repository secrets | `[]` |
| `nameOverride` | String to partially override `descheduler.fullname` template (will prepend the release name) | `""` |
| `fullnameOverride` | String to fully override `descheduler.fullname` template | `""` |
| `cronJobApiVersion` | CronJob API Group Version | `"batch/v1"` |
| `schedule` | The cron schedule to run the _descheduler_ job on | `"*/2 * * * *"` |
| `startingDeadlineSeconds` | If set, configure `startingDeadlineSeconds` for the _descheduler_ job | `nil` |
| `successfulJobsHistoryLimit` | If set, configure `successfulJobsHistoryLimit` for the _descheduler_ job | `nil` |
| `failedJobsHistoryLimit` | If set, configure `failedJobsHistoryLimit` for the _descheduler_ job | `nil` |
| `deschedulingInterval` | If using kind:Deployment, sets time between consecutive descheduler executions. | `5m` |
| `cmdOptions` | The options to pass to the _descheduler_ command | _see values.yaml_ |
| `deschedulerPolicy.strategies` | The _descheduler_ strategies to apply | _see values.yaml_ |
| `priorityClassName` | The name of the priority class to add to pods | `system-cluster-critical` |
| `rbac.create` | If `true`, create & use RBAC resources | `true` |
| `podSecurityPolicy.create` | If `true`, create PodSecurityPolicy | `true` |
| `resources` | Descheduler container CPU and memory requests/limits | _see values.yaml_ |
| `serviceAccount.create` | If `true`, create a service account for the cron job | `true` |
| `serviceAccount.name` | The name of the service account to use, if not set and create is true a name is generated using the fullname template | `nil` |
| `nodeSelector` | Node selectors to run the descheduler cronjob on specific nodes | `nil` |
| `tolerations` | tolerations to run the descheduler cronjob on specific nodes | `nil` |
| `suspend` | Set spec.suspend in descheduler cronjob | `false` |
| `commonLabels` | Labels to apply to all resources | `{}` |

View File

@@ -1 +1 @@
Descheduler installed as a cron job.
Descheduler installed as a {{ .Values.kind }} .

View File

@@ -42,6 +42,17 @@ app.kubernetes.io/instance: {{ .Release.Name }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- if .Values.commonLabels}}
{{ toYaml .Values.commonLabels }}
{{- end }}
{{- end -}}
{{/*
Selector labels
*/}}
{{- define "descheduler.selectorLabels" -}}
app.kubernetes.io/name: {{ include "descheduler.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end -}}
{{/*

View File

@@ -12,10 +12,23 @@ rules:
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "watch", "list"]
- apiGroups: [""]
resources: ["namespaces"]
verbs: ["get", "watch", "list"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "watch", "list", "delete"]
- apiGroups: [""]
resources: ["pods/eviction"]
verbs: ["create"]
- apiGroups: ["scheduling.k8s.io"]
resources: ["priorityclasses"]
verbs: ["get", "watch", "list"]
{{- if .Values.podSecurityPolicy.create }}
- apiGroups: ['policy']
resources: ['podsecuritypolicies']
verbs: ['use']
resourceNames:
- {{ template "descheduler.fullname" . }}
{{- end }}
{{- end -}}

View File

@@ -1,4 +1,5 @@
apiVersion: batch/v1beta1
{{- if eq .Values.kind "CronJob" }}
apiVersion: {{ .Values.cronJobApiVersion | default "batch/v1" }}
kind: CronJob
metadata:
name: {{ template "descheduler.fullname" . }}
@@ -6,7 +7,19 @@ metadata:
{{- include "descheduler.labels" . | nindent 4 }}
spec:
schedule: {{ .Values.schedule | quote }}
{{- if .Values.suspend }}
suspend: {{ .Values.suspend }}
{{- end }}
concurrencyPolicy: "Forbid"
{{- if .Values.startingDeadlineSeconds }}
startingDeadlineSeconds: {{ .Values.startingDeadlineSeconds }}
{{- end }}
{{- if .Values.successfulJobsHistoryLimit }}
successfulJobsHistoryLimit: {{ .Values.successfulJobsHistoryLimit }}
{{- end }}
{{- if .Values.failedJobsHistoryLimit }}
failedJobsHistoryLimit: {{ .Values.failedJobsHistoryLimit }}
{{- end }}
jobTemplate:
spec:
template:
@@ -18,17 +31,32 @@ spec:
{{- .Values.podAnnotations | toYaml | nindent 12 }}
{{- end }}
labels:
app.kubernetes.io/name: {{ include "descheduler.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- include "descheduler.selectorLabels" . | nindent 12 }}
{{- if .Values.podLabels }}
{{- .Values.podLabels | toYaml | nindent 12 }}
{{- end }}
spec:
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- if .Values.priorityClassName }}
priorityClassName: {{ .Values.priorityClassName }}
{{- end }}
serviceAccountName: {{ template "descheduler.serviceAccountName" . }}
restartPolicy: "Never"
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 10 }}
{{- end }}
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default (printf "v%s" .Chart.AppVersion) }}"
@@ -44,6 +72,18 @@ spec:
- {{ $value | quote }}
{{- end }}
{{- end }}
livenessProbe:
{{- toYaml .Values.livenessProbe | nindent 16 }}
resources:
{{- toYaml .Values.resources | nindent 16 }}
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
privileged: false
readOnlyRootFilesystem: true
runAsNonRoot: true
volumeMounts:
- mountPath: /policy-dir
name: policy-volume
@@ -51,3 +91,4 @@ spec:
- name: policy-volume
configMap:
name: {{ template "descheduler.fullname" . }}
{{- end }}

View File

@@ -0,0 +1,81 @@
{{- if eq .Values.kind "Deployment" }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ template "descheduler.fullname" . }}
labels:
{{- include "descheduler.labels" . | nindent 4 }}
spec:
replicas: 1
selector:
matchLabels:
{{- include "descheduler.selectorLabels" . | nindent 6 }}
template:
metadata:
labels:
{{- include "descheduler.selectorLabels" . | nindent 8 }}
{{- if .Values.podLabels }}
{{- .Values.podLabels | toYaml | nindent 8 }}
{{- end }}
annotations:
checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
{{- if .Values.podAnnotations }}
{{- .Values.podAnnotations | toYaml | nindent 8 }}
{{- end }}
spec:
{{- if .Values.priorityClassName }}
priorityClassName: {{ .Values.priorityClassName }}
{{- end }}
serviceAccountName: {{ template "descheduler.serviceAccountName" . }}
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default (printf "v%s" .Chart.AppVersion) }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
command:
- "/bin/descheduler"
args:
- "--policy-config-file"
- "/policy-dir/policy.yaml"
- "--descheduling-interval"
- {{ required "deschedulingInterval required for running as Deployment" .Values.deschedulingInterval }}
{{- range $key, $value := .Values.cmdOptions }}
- {{ printf "--%s" $key | quote }}
{{- if $value }}
- {{ $value | quote }}
{{- end }}
{{- end }}
ports:
- containerPort: 10258
protocol: TCP
livenessProbe:
{{- toYaml .Values.livenessProbe | nindent 12 }}
resources:
{{- toYaml .Values.resources | nindent 12 }}
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
privileged: false
readOnlyRootFilesystem: true
runAsNonRoot: true
volumeMounts:
- mountPath: /policy-dir
name: policy-volume
volumes:
- name: policy-volume
configMap:
name: {{ template "descheduler.fullname" . }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.affinity }}
affinity:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}

View File

@@ -0,0 +1,38 @@
{{- if .Values.podSecurityPolicy.create -}}
apiVersion: policy/v1beta1
kind: PodSecurityPolicy
metadata:
name: {{ template "descheduler.fullname" . }}
annotations:
seccomp.security.alpha.kubernetes.io/allowedProfileNames: 'docker/default,runtime/default'
seccomp.security.alpha.kubernetes.io/defaultProfileName: 'runtime/default'
spec:
privileged: false
allowPrivilegeEscalation: false
requiredDropCapabilities:
- ALL
volumes:
- 'configMap'
- 'secret'
hostNetwork: false
hostIPC: false
hostPID: false
runAsUser:
rule: 'MustRunAs'
ranges:
- min: 1
max: 65535
seLinux:
rule: 'RunAsAny'
supplementalGroups:
rule: 'MustRunAs'
ranges:
- min: 1
max: 65535
fsGroup:
rule: 'MustRunAs'
ranges:
- min: 1
max: 65535
readOnlyRootFilesystem: true
{{- end -}}

View File

@@ -5,4 +5,7 @@ metadata:
name: {{ template "descheduler.serviceAccountName" . }}
labels:
{{- include "descheduler.labels" . | nindent 4 }}
{{- if .Values.serviceAccount.annotations }}
annotations: {{ toYaml .Values.serviceAccount.annotations | nindent 4 }}
{{- end }}
{{- end -}}

View File

@@ -0,0 +1,29 @@
apiVersion: v1
kind: Pod
metadata:
name: descheduler-test-pod
annotations:
"helm.sh/hook": test
spec:
restartPolicy: Never
serviceAccountName: descheduler-ci
containers:
- name: descheduler-test-container
image: alpine:latest
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- All
privileged: false
runAsNonRoot: false
command: ["/bin/ash"]
args:
- -c
- >-
apk --no-cache add curl &&
curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl &&
chmod +x ./kubectl &&
mv ./kubectl /usr/local/bin/kubectl &&
/usr/local/bin/kubectl get pods --namespace kube-system --token "$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" | grep "descheduler" | grep "Completed"

View File

@@ -2,16 +2,40 @@
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
# CronJob or Deployment
kind: CronJob
image:
repository: us.gcr.io/k8s-artifacts-prod/descheduler/descheduler
repository: k8s.gcr.io/descheduler/descheduler
# Overrides the image tag whose default is the chart version
tag: ""
pullPolicy: IfNotPresent
imagePullSecrets: []
resources:
requests:
cpu: 500m
memory: 256Mi
# limits:
# cpu: 100m
# memory: 128Mi
nameOverride: ""
fullnameOverride: ""
# labels that'll be applied to all resources
commonLabels: {}
cronJobApiVersion: "batch/v1" # Use "batch/v1beta1" for k8s version < 1.21.0. TODO(@7i) remove with 1.23 release
schedule: "*/2 * * * *"
suspend: false
#startingDeadlineSeconds: 200
#successfulJobsHistoryLimit: 1
#failedJobsHistoryLimit: 1
# Required when running as a Deployment
deschedulingInterval: 5m
cmdOptions:
v: 3
@@ -28,8 +52,8 @@ deschedulerPolicy:
RemovePodsViolatingNodeAffinity:
enabled: true
params:
nodeAffinityType:
- requiredDuringSchedulingIgnoredDuringExecution
nodeAffinityType:
- requiredDuringSchedulingIgnoredDuringExecution
RemovePodsViolatingInterPodAntiAffinity:
enabled: true
LowNodeUtilization:
@@ -47,13 +71,49 @@ deschedulerPolicy:
priorityClassName: system-cluster-critical
nodeSelector: {}
# foo: bar
affinity: {}
# nodeAffinity:
# requiredDuringSchedulingIgnoredDuringExecution:
# nodeSelectorTerms:
# - matchExpressions:
# - key: kubernetes.io/e2e-az-name
# operator: In
# values:
# - e2e-az1
# - e2e-az2
tolerations: []
# - key: 'management'
# operator: 'Equal'
# value: 'tool'
# effect: 'NoSchedule'
rbac:
# Specifies whether RBAC resources should be created
create: true
podSecurityPolicy:
# Specifies whether PodSecurityPolicy should be created.
create: true
serviceAccount:
# Specifies whether a ServiceAccount should be created
create: true
# The name of the ServiceAccount to use.
# If not set and create is true, a name is generated using the fullname template
name:
# Specifies custom annotations for the serviceAccount
annotations: {}
livenessProbe:
failureThreshold: 3
httpGet:
path: /healthz
port: 10258
scheme: HTTPS
initialDelaySeconds: 3
periodSeconds: 10

View File

@@ -7,14 +7,14 @@ timeout: 1200s
options:
substitution_option: ALLOW_LOOSE
steps:
- name: 'gcr.io/k8s-testimages/gcb-docker-gcloud:v20190906-745fed4'
- name: 'gcr.io/k8s-staging-test-infra/gcb-docker-gcloud:v20211118-2f2d816b90'
entrypoint: make
env:
- DOCKER_CLI_EXPERIMENTAL=enabled
- VERSION=$_GIT_TAG
- BASE_REF=$_PULL_BASE_REF
args:
- push
- push-all
substitutions:
# _GIT_TAG will be filled with a git-based tag for the image, of the form vYYYYMMDD-hash, and
# can be used as a substitution

View File

@@ -18,44 +18,63 @@ limitations under the License.
package options
import (
"github.com/spf13/pflag"
apiserveroptions "k8s.io/apiserver/pkg/server/options"
clientset "k8s.io/client-go/kubernetes"
// install the componentconfig api so we get its defaulting and conversion functions
"sigs.k8s.io/descheduler/pkg/apis/componentconfig"
"sigs.k8s.io/descheduler/pkg/apis/componentconfig/v1alpha1"
deschedulerscheme "sigs.k8s.io/descheduler/pkg/descheduler/scheme"
)
"github.com/spf13/pflag"
const (
DefaultDeschedulerPort = 10258
)
// DeschedulerServer configuration
type DeschedulerServer struct {
componentconfig.DeschedulerConfiguration
Client clientset.Interface
Client clientset.Interface
SecureServing *apiserveroptions.SecureServingOptionsWithLoopback
DisableMetrics bool
}
// NewDeschedulerServer creates a new DeschedulerServer with default parameters
func NewDeschedulerServer() *DeschedulerServer {
versioned := v1alpha1.DeschedulerConfiguration{}
deschedulerscheme.Scheme.Default(&versioned)
cfg := componentconfig.DeschedulerConfiguration{}
deschedulerscheme.Scheme.Convert(versioned, &cfg, nil)
s := DeschedulerServer{
DeschedulerConfiguration: cfg,
func NewDeschedulerServer() (*DeschedulerServer, error) {
cfg, err := newDefaultComponentConfig()
if err != nil {
return nil, err
}
return &s
secureServing := apiserveroptions.NewSecureServingOptions().WithLoopback()
secureServing.BindPort = DefaultDeschedulerPort
return &DeschedulerServer{
DeschedulerConfiguration: *cfg,
SecureServing: secureServing,
}, nil
}
func newDefaultComponentConfig() (*componentconfig.DeschedulerConfiguration, error) {
versionedCfg := v1alpha1.DeschedulerConfiguration{}
deschedulerscheme.Scheme.Default(&versionedCfg)
cfg := componentconfig.DeschedulerConfiguration{}
if err := deschedulerscheme.Scheme.Convert(&versionedCfg, &cfg, nil); err != nil {
return nil, err
}
return &cfg, nil
}
// AddFlags adds flags for a specific SchedulerServer to the specified FlagSet
func (rs *DeschedulerServer) AddFlags(fs *pflag.FlagSet) {
fs.StringVar(&rs.Logging.Format, "logging-format", "text", `Sets the log format. Permitted formats: "text", "json". Non-default formats don't honor these flags: --add-dir-header, --alsologtostderr, --log-backtrace-at, --log-dir, --log-file, --log-file-max-size, --logtostderr, --skip-headers, --skip-log-headers, --stderrthreshold, --log-flush-frequency.\nNon-default choices are currently alpha and subject to change without warning.`)
fs.DurationVar(&rs.DeschedulingInterval, "descheduling-interval", rs.DeschedulingInterval, "Time interval between two consecutive descheduler executions. Setting this value instructs the descheduler to run in a continuous loop at the interval specified.")
fs.StringVar(&rs.KubeconfigFile, "kubeconfig", rs.KubeconfigFile, "File with kube configuration.")
fs.StringVar(&rs.PolicyConfigFile, "policy-config-file", rs.PolicyConfigFile, "File with descheduler policy configuration.")
fs.BoolVar(&rs.DryRun, "dry-run", rs.DryRun, "execute descheduler in dry run mode.")
// node-selector query causes descheduler to run only on nodes that matches the node labels in the query
fs.StringVar(&rs.NodeSelector, "node-selector", rs.NodeSelector, "Selector (label query) to filter on, supports '=', '==', and '!='.(e.g. -l key1=value1,key2=value2)")
// max-no-pods-to-evict limits the maximum number of pods to be evicted per node by descheduler.
fs.IntVar(&rs.MaxNoOfPodsToEvictPerNode, "max-pods-to-evict-per-node", rs.MaxNoOfPodsToEvictPerNode, "Limits the maximum number of pods to be evicted per node by descheduler")
// evict-local-storage-pods allows eviction of pods that are using local storage. This is false by default.
fs.BoolVar(&rs.EvictLocalStoragePods, "evict-local-storage-pods", rs.EvictLocalStoragePods, "Enables evicting pods using local storage by descheduler")
fs.BoolVar(&rs.DisableMetrics, "disable-metrics", rs.DisableMetrics, "Disables metrics. The metrics are by default served through https://localhost:10258/metrics. Secure address, resp. port can be changed through --bind-address, resp. --secure-port flags.")
rs.SecureServing.AddFlags(fs)
}

View File

@@ -18,44 +18,86 @@ limitations under the License.
package app
import (
"flag"
"context"
"io"
"os/signal"
"syscall"
"k8s.io/apiserver/pkg/server/healthz"
"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
"sigs.k8s.io/descheduler/pkg/descheduler"
"github.com/spf13/cobra"
aflag "k8s.io/component-base/cli/flag"
"k8s.io/component-base/logs"
apiserver "k8s.io/apiserver/pkg/server"
"k8s.io/apiserver/pkg/server/mux"
restclient "k8s.io/client-go/rest"
"k8s.io/component-base/config"
_ "k8s.io/component-base/logs/json/register"
"k8s.io/component-base/logs/registry"
"k8s.io/component-base/metrics/legacyregistry"
"k8s.io/klog/v2"
)
// NewDeschedulerCommand creates a *cobra.Command object with default parameters
func NewDeschedulerCommand(out io.Writer) *cobra.Command {
s := options.NewDeschedulerServer()
s, err := options.NewDeschedulerServer()
if err != nil {
klog.ErrorS(err, "unable to initialize server")
}
cmd := &cobra.Command{
Use: "descheduler",
Short: "descheduler",
Long: `The descheduler evicts pods which may be bound to less desired nodes`,
Run: func(cmd *cobra.Command, args []string) {
logs.InitLogs()
defer logs.FlushLogs()
err := Run(s)
// s.Logs.Config.Format = s.Logging.Format
// LoopbackClientConfig is a config for a privileged loopback connection
var LoopbackClientConfig *restclient.Config
var SecureServing *apiserver.SecureServingInfo
if err := s.SecureServing.ApplyTo(&SecureServing, &LoopbackClientConfig); err != nil {
klog.ErrorS(err, "failed to apply secure server configuration")
return
}
factory, _ := registry.LogRegistry.Get(s.Logging.Format)
if factory == nil {
klog.ClearLogger()
} else {
log, logrFlush := factory.Create(config.FormatOptions{})
defer logrFlush()
klog.SetLogger(log)
}
ctx, done := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
defer done()
pathRecorderMux := mux.NewPathRecorderMux("descheduler")
if !s.DisableMetrics {
pathRecorderMux.Handle("/metrics", legacyregistry.HandlerWithReset())
}
healthz.InstallHandler(pathRecorderMux, healthz.NamedCheck("Descheduler", healthz.PingHealthz.Check))
if _, err := SecureServing.Serve(pathRecorderMux, 0, ctx.Done()); err != nil {
klog.Fatalf("failed to start secure server: %v", err)
return
}
err := Run(ctx, s)
if err != nil {
klog.Errorf("%v", err)
klog.ErrorS(err, "descheduler server")
}
},
}
cmd.SetOutput(out)
cmd.SetOut(out)
flags := cmd.Flags()
flags.SetNormalizeFunc(aflag.WordSepNormalizeFunc)
flags.AddGoFlagSet(flag.CommandLine)
s.AddFlags(flags)
return cmd
}
func Run(rs *options.DeschedulerServer) error {
return descheduler.Run(rs)
func Run(ctx context.Context, rs *options.DeschedulerServer) error {
return descheduler.Run(ctx, rs)
}

View File

@@ -18,70 +18,19 @@ package app
import (
"fmt"
"runtime"
"strings"
"github.com/spf13/cobra"
"sigs.k8s.io/descheduler/pkg/version"
)
var (
// gitCommit is a constant representing the source version that
// generated this build. It should be set during build via -ldflags.
gitCommit string
// version is a constant representing the version tag that
// generated this build. It should be set during build via -ldflags.
version string
// buildDate in ISO8601 format, output of $(date -u +'%Y-%m-%dT%H:%M:%SZ')
//It should be set during build via -ldflags.
buildDate string
)
// Info holds the information related to descheduler app version.
type Info struct {
Major string `json:"major"`
Minor string `json:"minor"`
GitCommit string `json:"gitCommit"`
GitVersion string `json:"gitVersion"`
BuildDate string `json:"buildDate"`
GoVersion string `json:"goVersion"`
Compiler string `json:"compiler"`
Platform string `json:"platform"`
}
// Get returns the overall codebase version. It's for detecting
// what code a binary was built from.
func Get() Info {
majorVersion, minorVersion := splitVersion(version)
return Info{
Major: majorVersion,
Minor: minorVersion,
GitCommit: gitCommit,
GitVersion: version,
BuildDate: buildDate,
GoVersion: runtime.Version(),
Compiler: runtime.Compiler,
Platform: fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH),
}
}
func NewVersionCommand() *cobra.Command {
var versionCmd = &cobra.Command{
Use: "version",
Short: "Version of descheduler",
Long: `Prints the version of descheduler.`,
Run: func(cmd *cobra.Command, args []string) {
fmt.Printf("Descheduler version %+v\n", Get())
fmt.Printf("Descheduler version %+v\n", version.Get())
},
}
return versionCmd
}
// splitVersion splits the git version to generate major and minor versions needed.
func splitVersion(version string) (string, string) {
if version == "" {
return "", ""
}
// A sample version would be of form v0.1.0-7-ge884046, so split at first '.' and
// then return 0 and 1+(+ appended to follow semver convention) for major and minor versions.
return strings.Trim(strings.Split(version, ".")[0], "v"), strings.Split(version, ".")[1] + "+"
}

View File

@@ -17,20 +17,23 @@ limitations under the License.
package main
import (
"flag"
"fmt"
"os"
"k8s.io/component-base/cli"
"k8s.io/klog/v2"
"sigs.k8s.io/descheduler/cmd/descheduler/app"
)
func init() {
klog.SetOutput(os.Stdout)
klog.InitFlags(nil)
}
func main() {
out := os.Stdout
cmd := app.NewDeschedulerCommand(out)
cmd.AddCommand(app.NewVersionCommand())
flag.CommandLine.Parse([]string{})
if err := cmd.Execute(); err != nil {
fmt.Println(err)
os.Exit(1)
}
code := cli.Run(cmd)
os.Exit(code)
}

View File

@@ -3,10 +3,10 @@
## Required Tools
- [Git](https://git-scm.com/downloads)
- [Go 1.14+](https://golang.org/dl/)
- [Go 1.16+](https://golang.org/dl/)
- [Docker](https://docs.docker.com/install/)
- [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl)
- [kind](https://kind.sigs.k8s.io/)
- [kind v0.10.0+](https://kind.sigs.k8s.io/)
## Build and Run
@@ -34,9 +34,23 @@ GOOS=linux make dev-image
kind create cluster --config hack/kind_config.yaml
kind load docker-image <image name>
kind get kubeconfig > /tmp/admin.conf
export KUBECONFIG=/tmp/admin.conf
make test-unit
make test-e2e
```
## Run Helm Tests
Run the helm test for a particular descheduler release by setting below variables,
```
HELM_IMAGE_REPO="descheduler"
HELM_IMAGE_TAG="helm-test"
HELM_CHART_LOCATION="./charts/descheduler"
```
The helm tests runs as part of descheduler CI. But, to run it manually from the descheduler root,
```
make test-helm
```
### Miscellaneous
See the [hack directory](https://github.com/kubernetes-sigs/descheduler/tree/master/hack) for additional tools and scripts used for developing the descheduler.
See the [hack directory](https://github.com/kubernetes-sigs/descheduler/tree/master/hack) for additional tools and scripts used for developing the descheduler.

View File

@@ -7,9 +7,9 @@
1. Make sure your repo is clean by git's standards
2. Create a release branch `git checkout -b release-1.18` (not required for patch releases)
3. Push the release branch to the descheuler repo and ensure branch protection is enabled (not required for patch releases)
4. Tag the repository and push the tag `VERSION=v0.18.0 git tag -m $VERSION $VERSION; git push origin $VERSION`
4. Tag the repository from the `master` branch (from the `release-1.18` branch for a patch release) and push the tag `VERSION=v0.18.0 git tag -m $VERSION $VERSION; git push origin $VERSION`
5. Publish a draft release using the tag you just created
6. Perform the [image promotion process](https://github.com/kubernetes/k8s.io/tree/master/k8s.gcr.io#image-promoter)
6. Perform the [image promotion process](https://github.com/kubernetes/k8s.io/tree/main/k8s.gcr.io#image-promoter)
7. Publish release
8. Email `kubernetes-sig-scheduling@googlegroups.com` to announce the release
@@ -18,15 +18,20 @@
1. Make sure your repo is clean by git's standards
2. Create a release branch `git checkout -b release-1.18` (not required for patch releases)
3. Push the release branch to the descheuler repo and ensure branch protection is enabled (not required for patch releases)
4. Tag the repository and push the tag `VERSION=v0.18.0 git tag -m $VERSION $VERSION; git push origin $VERSION`
4. Tag the repository from the `master` branch (from the `release-1.18` branch for a patch release) and push the tag `VERSION=v0.18.0 git tag -m $VERSION $VERSION; git push origin $VERSION`
5. Checkout the tag you just created and make sure your repo is clean by git's standards `git checkout $VERSION`
6. Build and push the container image to the staging registry `VERSION=$VERSION make push`
6. Build and push the container image to the staging registry `VERSION=$VERSION make push-all`
7. Publish a draft release using the tag you just created
8. Perform the [image promotion process](https://github.com/kubernetes/k8s.io/tree/master/k8s.gcr.io#image-promoter)
8. Perform the [image promotion process](https://github.com/kubernetes/k8s.io/tree/main/k8s.gcr.io#image-promoter)
9. Publish release
10. Email `kubernetes-sig-scheduling@googlegroups.com` to announce the release
### Notes
It's important to create the tag on the master branch after creating the release-* branch so that the [Helm releaser-action](#helm-chart) can work.
It compares the changes in the action-triggering branch to the latest tag on that branch, so if you tag before creating the new branch there
will be nothing to compare and it will fail (creating a new release branch usually involves no code changes). For this same reason, you should
also tag patch releases (on the release-* branch) *after* pushing changes (if those changes involve bumping the Helm chart version).
See [post-descheduler-push-images dashboard](https://testgrid.k8s.io/sig-scheduling#post-descheduler-push-images) for staging registry image build job status.
View the descheduler staging registry using [this URL](https://console.cloud.google.com/gcr/images/k8s-staging-descheduler/GLOBAL/descheduler) in a web browser
@@ -53,17 +58,17 @@ docker pull gcr.io/k8s-staging-descheduler/descheduler:v20200206-0.9.0-94-ge2a23
```
## Helm Chart
Helm chart releases are managed by a separate set of git tags that are prefixed with `chart-*`. Example git tag name is `chart-0.18.0`. Released versions of the
helm charts are stored in the `gh-pages` branch of this repo. The [chart-releaser-action GitHub Action](https://github.com/helm/chart-releaser-action) is setup to
build and push the helm charts to the `gh-pages` branch when a `chart-*` git tag is created.
Helm chart releases are managed by a separate set of git tags that are prefixed with `descheduler-helm-chart-*`. Example git tag name is `descheduler-helm-chart-0.18.0`.
Released versions of the helm charts are stored in the `gh-pages` branch of this repo. The [chart-releaser-action GitHub Action](https://github.com/helm/chart-releaser-action)
is setup to build and push the helm charts to the `gh-pages` branch when changes are pushed to a `release-*` branch.
The major and minor version of the chart matches the descheduler major and minor versions. For example descheduler helm chart version chart-0.18.0 corresponds
The major and minor version of the chart matches the descheduler major and minor versions. For example descheduler helm chart version helm-descheduler-chart-0.18.0 corresponds
to descheduler version v0.18.0. The patch version of the descheduler helm chart and the patcher version of the descheduler will not necessarily match. The patch
version of the descheduler helm chart is used to version changes specific to the helm chart.
1. Merge all helm chart changes into the appropriate release branch(i.e. release-1.18)
1. Merge all helm chart changes into the master branch before the release is tagged/cut
1. Ensure that `appVersion` in file `charts/descheduler/Chart.yaml` matches the descheduler version(no `v` prefix)
2. Ensure that `version` in file `charts/descheduler/Chart.yaml` has been incremented. This is the chart version.
2. Make sure your repo is clean by git's standards
3. Create the tag and push it `git checkout release-1.18; CHART_VERSION=chart-0.18.0; git tag $CHART_VERSION; git push origin $CHART_VERSION`
4. Verify the new helm artifact has been successfully pushed to the `gh-pages` branch
3. Follow the release-branch or patch release tagging pattern from the above section.
4. Verify the new helm artifact has been successfully pushed to the `gh-pages` branch

View File

@@ -1,9 +1,25 @@
# User Guide
Starting with descheduler release v0.10.0 container images are available in these container registries.
* `asia.gcr.io/k8s-artifacts-prod/descheduler/descheduler`
* `eu.gcr.io/k8s-artifacts-prod/descheduler/descheduler`
* `us.gcr.io/k8s-artifacts-prod/descheduler/descheduler`
Starting with descheduler release v0.10.0 container images are available in the official k8s container registry.
Descheduler Version | Container Image | Architectures |
------------------- |--------------------------------------------|-------------------------|
v0.23.0 | k8s.gcr.io/descheduler/descheduler:v0.23.0 | AMD64<br>ARM64<br>ARMv7 |
v0.22.0 | k8s.gcr.io/descheduler/descheduler:v0.22.0 | AMD64<br>ARM64<br>ARMv7 |
v0.21.0 | k8s.gcr.io/descheduler/descheduler:v0.21.0 | AMD64<br>ARM64<br>ARMv7 |
v0.20.0 | k8s.gcr.io/descheduler/descheduler:v0.20.0 | AMD64<br>ARM64 |
v0.19.0 | k8s.gcr.io/descheduler/descheduler:v0.19.0 | AMD64 |
v0.18.0 | k8s.gcr.io/descheduler/descheduler:v0.18.0 | AMD64 |
v0.10.0 | k8s.gcr.io/descheduler/descheduler:v0.10.0 | AMD64 |
Note that multi-arch container images cannot be pulled by [kind](https://kind.sigs.k8s.io) from a registry. Therefore
starting with descheduler release v0.20.0 use the below process to download the official descheduler
image into a kind cluster.
```
kind create cluster
docker pull k8s.gcr.io/descheduler/descheduler:v0.20.0
kind load docker-image k8s.gcr.io/descheduler/descheduler:v0.20.0
```
## Policy Configuration Examples
The [examples](https://github.com/kubernetes-sigs/descheduler/tree/master/examples) directory has descheduler policy configuration examples.
@@ -23,11 +39,11 @@ Available Commands:
version Version of descheduler
Flags:
--add-dir-header If true, adds the file directory to the header
--add-dir-header If true, adds the file directory to the header of the log messages
--alsologtostderr log to standard error as well as files
--descheduling-interval duration Time interval between two consecutive descheduler executions. Setting this value instructs the descheduler to run in a continuous loop at the interval specified.
--dry-run execute descheduler in dry run mode.
--evict-local-storage-pods Enables evicting pods using local storage by descheduler
--evict-local-storage-pods DEPRECATED: enables evicting pods using local storage by descheduler
-h, --help help for descheduler
--kubeconfig string File with kube configuration.
--log-backtrace-at traceLocation when logging hits line file:N, emit a stack trace (default :0)
@@ -36,8 +52,8 @@ Flags:
--log-file-max-size uint Defines the maximum size a log file can grow to. Unit is megabytes. If the value is 0, the maximum file size is unlimited. (default 1800)
--log-flush-frequency duration Maximum number of seconds between log flushes (default 5s)
--logtostderr log to standard error instead of files (default true)
--max-pods-to-evict-per-node int Limits the maximum number of pods to be evicted per node by descheduler
--node-selector string Selector (label query) to filter on, supports '=', '==', and '!='.(e.g. -l key1=value1,key2=value2)
--max-pods-to-evict-per-node int DEPRECATED: limits the maximum number of pods to be evicted per node by descheduler
--node-selector string DEPRECATED: selector (label query) to filter on, supports '=', '==', and '!='.(e.g. -l key1=value1,key2=value2)
--policy-config-file string File with descheduler policy configuration.
--skip-headers If true, avoid header prefixes in the log messages
--skip-log-headers If true, avoid headers when opening log files
@@ -71,20 +87,57 @@ This policy configuration file ensures that pods created more than 7 days ago ar
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"LowNodeUtilization":
enabled: false
"RemoveDuplicates":
enabled: false
"RemovePodsViolatingInterPodAntiAffinity":
enabled: false
"RemovePodsViolatingNodeAffinity":
enabled: false
"RemovePodsViolatingNodeTaints":
enabled: false
"RemovePodsHavingTooManyRestarts":
enabled: false
"PodLifeTime":
enabled: true
params:
maxPodLifeTimeSeconds: 604800 # pods run for a maximum of 7 days
```
### Balance Cluster By Node Memory Utilization
If your cluster has been running for a long period of time, you may find that the resource utilization is not very
balanced. The following two strategies can be used to rebalance your cluster based on `cpu`, `memory`
or `number of pods`.
#### Balance high utilization nodes
Using `LowNodeUtilization`, descheduler will rebalance the cluster based on memory by evicting pods
from nodes with memory utilization over 70% to nodes with memory utilization below 20%.
```
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"LowNodeUtilization":
enabled: true
params:
nodeResourceUtilizationThresholds:
thresholds:
"memory": 20
targetThresholds:
"memory": 70
```
#### Balance low utilization nodes
Using `HighNodeUtilization`, descheduler will rebalance the cluster based on memory by evicting pods
from nodes with memory utilization lower than 20%. This should be used along with scheduler strategy `MostRequestedPriority`.
The evicted pods will be compacted into minimal set of nodes.
```
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"HighNodeUtilization":
enabled: true
params:
nodeResourceUtilizationThresholds:
thresholds:
"memory": 20
```
### Autoheal Node Problems
Descheduler's `RemovePodsViolatingNodeTaints` strategy can be combined with
[Node Problem Detector](https://github.com/kubernetes/node-problem-detector/) and
[Cluster Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler) to automatically remove
Nodes which have problems. Node Problem Detector can detect specific Node problems and taint any Nodes which have those
problems. The Descheduler will then deschedule workloads from those Nodes. Finally, if the descheduled Node's resource
allocation falls below the Cluster Autoscaler's scale down threshold, the Node will become a scale down candidate
and can be removed by Cluster Autoscaler. These three components form an autohealing cycle for Node problems.

14
examples/failed-pods.yaml Normal file
View File

@@ -0,0 +1,14 @@
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"RemoveFailedPods":
enabled: true
params:
failedPods:
reasons:
- "OutOfcpu"
- "CreateContainerConfigError"
includingInitContainers: true
excludeOwnerKinds:
- "Job"
minPodLifetimeSeconds: 3600 # 1 hour

View File

@@ -0,0 +1,9 @@
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"HighNodeUtilization":
enabled: true
params:
nodeResourceUtilizationThresholds:
thresholds:
"memory": 20

View File

@@ -0,0 +1,11 @@
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"LowNodeUtilization":
enabled: true
params:
nodeResourceUtilizationThresholds:
thresholds:
"memory": 20
targetThresholds:
"memory": 70

View File

@@ -1,20 +1,8 @@
---
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"LowNodeUtilization":
enabled: false
"RemoveDuplicates":
enabled: false
"RemovePodsViolatingInterPodAntiAffinity":
enabled: false
"RemovePodsViolatingNodeAffinity":
enabled: false
"RemovePodsViolatingNodeTaints":
enabled: false
"RemovePodsHavingTooManyRestarts":
enabled: false
"PodLifeTime":
enabled: true
params:
maxPodLifeTimeSeconds: 604800 # 7 days
podLifeTime:
maxPodLifeTimeSeconds: 604800 # 7 days

View File

@@ -23,3 +23,7 @@ strategies:
podsHavingTooManyRestarts:
podRestartThreshold: 100
includingInitContainers: true
"RemovePodsViolatingTopologySpreadConstraint":
enabled: true
params:
includeSoftConstraints: true

View File

@@ -0,0 +1,7 @@
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"RemovePodsViolatingTopologySpreadConstraint":
enabled: true
params:
includeSoftConstraints: true # Include 'ScheduleAnyways' constraints

120
go.mod
View File

@@ -1,15 +1,117 @@
module sigs.k8s.io/descheduler
go 1.14
go 1.17
require (
github.com/spf13/cobra v0.0.5
github.com/client9/misspell v0.3.4
github.com/spf13/cobra v1.2.1
github.com/spf13/pflag v1.0.5
k8s.io/api v0.18.4
k8s.io/apimachinery v0.18.4
k8s.io/apiserver v0.18.4
k8s.io/client-go v0.18.4
k8s.io/code-generator v0.18.4
k8s.io/component-base v0.18.4
k8s.io/klog/v2 v2.0.0
k8s.io/api v0.23.0
k8s.io/apimachinery v0.23.0
k8s.io/apiserver v0.23.0
k8s.io/client-go v0.23.0
k8s.io/code-generator v0.23.0
k8s.io/component-base v0.23.0
k8s.io/component-helpers v0.23.0
k8s.io/klog/v2 v2.30.0
k8s.io/kubectl v0.20.5
sigs.k8s.io/mdtoc v1.0.1
)
require (
cloud.google.com/go v0.81.0 // indirect
github.com/Azure/go-autorest v14.2.0+incompatible // indirect
github.com/Azure/go-autorest/autorest v0.11.18 // indirect
github.com/Azure/go-autorest/autorest/adal v0.9.13 // indirect
github.com/Azure/go-autorest/autorest/date v0.3.0 // indirect
github.com/Azure/go-autorest/logger v0.2.1 // indirect
github.com/Azure/go-autorest/tracing v0.6.0 // indirect
github.com/BurntSushi/toml v0.3.1 // indirect
github.com/NYTimes/gziphandler v1.1.1 // indirect
github.com/PuerkitoBio/purell v1.1.1 // indirect
github.com/PuerkitoBio/urlesc v0.0.0-20170810143723-de5bf2ad4578 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/blang/semver v3.5.1+incompatible // indirect
github.com/cespare/xxhash/v2 v2.1.1 // indirect
github.com/coreos/go-semver v0.3.0 // indirect
github.com/coreos/go-systemd/v22 v22.3.2 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/emicklei/go-restful v2.9.5+incompatible // indirect
github.com/evanphx/json-patch v4.12.0+incompatible // indirect
github.com/felixge/httpsnoop v1.0.1 // indirect
github.com/form3tech-oss/jwt-go v3.2.3+incompatible // indirect
github.com/fsnotify/fsnotify v1.4.9 // indirect
github.com/go-logr/logr v1.2.0 // indirect
github.com/go-logr/zapr v1.2.0 // indirect
github.com/go-openapi/jsonpointer v0.19.5 // indirect
github.com/go-openapi/jsonreference v0.19.5 // indirect
github.com/go-openapi/swag v0.19.14 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/golang/protobuf v1.5.2 // indirect
github.com/gomarkdown/markdown v0.0.0-20200824053859-8c8b3816f167 // indirect
github.com/google/go-cmp v0.5.5 // indirect
github.com/google/gofuzz v1.1.0 // indirect
github.com/google/uuid v1.1.2 // indirect
github.com/googleapis/gnostic v0.5.5 // indirect
github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0 // indirect
github.com/grpc-ecosystem/grpc-gateway v1.16.0 // indirect
github.com/imdario/mergo v0.3.5 // indirect
github.com/inconshreveable/mousetrap v1.0.0 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/mailru/easyjson v0.7.6 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.2-0.20181231171920-c182affec369 // indirect
github.com/mmarkdown/mmark v2.0.40+incompatible // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/client_golang v1.11.0 // indirect
github.com/prometheus/client_model v0.2.0 // indirect
github.com/prometheus/common v0.28.0 // indirect
github.com/prometheus/procfs v0.6.0 // indirect
go.etcd.io/etcd/api/v3 v3.5.0 // indirect
go.etcd.io/etcd/client/pkg/v3 v3.5.0 // indirect
go.etcd.io/etcd/client/v3 v3.5.0 // indirect
go.opentelemetry.io/contrib v0.20.0 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.20.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.20.0 // indirect
go.opentelemetry.io/otel v0.20.0 // indirect
go.opentelemetry.io/otel/exporters/otlp v0.20.0 // indirect
go.opentelemetry.io/otel/metric v0.20.0 // indirect
go.opentelemetry.io/otel/sdk v0.20.0 // indirect
go.opentelemetry.io/otel/sdk/export/metric v0.20.0 // indirect
go.opentelemetry.io/otel/sdk/metric v0.20.0 // indirect
go.opentelemetry.io/otel/trace v0.20.0 // indirect
go.opentelemetry.io/proto/otlp v0.7.0 // indirect
go.uber.org/atomic v1.7.0 // indirect
go.uber.org/multierr v1.6.0 // indirect
go.uber.org/zap v1.19.0 // indirect
golang.org/x/crypto v0.0.0-20210817164053-32db794688a5 // indirect
golang.org/x/mod v0.4.2 // indirect
golang.org/x/net v0.0.0-20210825183410-e898025ed96a // indirect
golang.org/x/oauth2 v0.0.0-20210819190943-2bc19b11175f // indirect
golang.org/x/sync v0.0.0-20210220032951-036812b2e83c // indirect
golang.org/x/sys v0.0.0-20210831042530-f4d43177bf5e // indirect
golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b // indirect
golang.org/x/text v0.3.7 // indirect
golang.org/x/time v0.0.0-20210723032227-1f47c861a9ac // indirect
golang.org/x/tools v0.1.6-0.20210820212750-d4cc65f0b2ff // indirect
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/genproto v0.0.0-20210831024726-fe130286e0e2 // indirect
google.golang.org/grpc v1.40.0 // indirect
google.golang.org/protobuf v1.27.1 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/natefinch/lumberjack.v2 v2.0.0 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b // indirect
k8s.io/gengo v0.0.0-20210813121822-485abfe95c7c // indirect
k8s.io/kube-openapi v0.0.0-20211115234752-e816edb12b65 // indirect
k8s.io/utils v0.0.0-20210930125809-cb0fa318a74b // indirect
sigs.k8s.io/apiserver-network-proxy/konnectivity-client v0.0.25 // indirect
sigs.k8s.io/json v0.0.0-20211020170558-c049b76a60c6 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.2.0 // indirect
sigs.k8s.io/yaml v1.2.0 // indirect
)

972
go.sum

File diff suppressed because it is too large Load Diff

6
hack/.spelling_failures Normal file
View File

@@ -0,0 +1,6 @@
BUILD
CHANGELOG
OWNERS
go.mod
go.sum
vendor/

View File

@@ -18,15 +18,15 @@ E2E_GCE_HOME=$DESCHEDULER_ROOT/hack/e2e-gce
create_cluster() {
echo "#################### Creating instances ##########################"
gcloud compute instances create descheduler-$master_uuid --image="ubuntu-1604-xenial-v20180306" --image-project="ubuntu-os-cloud" --zone=us-east1-b
gcloud compute instances create descheduler-$master_uuid --image-family="ubuntu-1804-lts" --image-project="ubuntu-os-cloud" --zone=us-east1-b
# Keeping the --zone here so as to make sure that e2e's can run locally.
echo "gcloud compute instances delete descheduler-$master_uuid --zone=us-east1-b --quiet" > $E2E_GCE_HOME/delete_cluster.sh
gcloud compute instances create descheduler-$node1_uuid --image="ubuntu-1604-xenial-v20180306" --image-project="ubuntu-os-cloud" --zone=us-east1-b
gcloud compute instances create descheduler-$node1_uuid --image-family="ubuntu-1804-lts" --image-project="ubuntu-os-cloud" --zone=us-east1-b
echo "gcloud compute instances delete descheduler-$node1_uuid --zone=us-east1-b --quiet" >> $E2E_GCE_HOME/delete_cluster.sh
gcloud compute instances create descheduler-$node2_uuid --image="ubuntu-1604-xenial-v20180306" --image-project="ubuntu-os-cloud" --zone=us-east1-b
echo "gcloud compute instances delete descheduler-$node2_uuid --zone=us-east1-b --quiet" >> $E2E_GCE_HOME/delete_cluster.sh
gcloud compute instances create descheduler-$node2_uuid --image-family="ubuntu-1804-lts" --image-project="ubuntu-os-cloud" --zone=us-east1-b
echo "gcloud compute instances delete descheduler-$node2_uuid --zone=us-east1-c --quiet" >> $E2E_GCE_HOME/delete_cluster.sh
# Delete the firewall port created for master.
echo "gcloud compute firewall-rules delete kubeapiserver-$master_uuid --quiet" >> $E2E_GCE_HOME/delete_cluster.sh
@@ -44,10 +44,10 @@ generate_kubeadm_instance_files() {
transfer_install_files() {
gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$master_uuid:/tmp --zone=us-east1-b
gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$master_uuid:/tmp --zone=us-east1-b
gcloud compute scp $E2E_GCE_HOME/kubeadm_install.sh descheduler-$master_uuid:/tmp --zone=us-east1-b
gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$node1_uuid:/tmp --zone=us-east1-b
gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$node2_uuid:/tmp --zone=us-east1-b
gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$node1_uuid:/tmp --zone=us-east1-b
gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$node2_uuid:/tmp --zone=us-east1-c
}
@@ -55,19 +55,19 @@ install_kube() {
# Docker installation.
gcloud compute ssh descheduler-$master_uuid --command "sudo apt-get update; sudo apt-get install -y docker.io" --zone=us-east1-b
gcloud compute ssh descheduler-$node1_uuid --command "sudo apt-get update; sudo apt-get install -y docker.io" --zone=us-east1-b
gcloud compute ssh descheduler-$node2_uuid --command "sudo apt-get update; sudo apt-get install -y docker.io" --zone=us-east1-b
gcloud compute ssh descheduler-$node2_uuid --command "sudo apt-get update; sudo apt-get install -y docker.io" --zone=us-east1-c
# kubeadm installation.
# 1. Transfer files to master, nodes.
transfer_install_files
# 2. Install kubeadm.
#TODO: Add rm /tmp/kubeadm_install.sh
# Open port for kube API server
gcloud compute firewall-rules create kubeapiserver-$master_uuid --allow tcp:6443 --source-tags=descheduler-$master_uuid --source-ranges=0.0.0.0/0 --description="Opening api server port"
gcloud compute firewall-rules create kubeapiserver-$master_uuid --allow tcp:6443 --source-tags=descheduler-$master_uuid --source-ranges=0.0.0.0/0 --description="Opening api server port"
gcloud compute ssh descheduler-$master_uuid --command "sudo chmod 755 /tmp/kubeadm_preinstall.sh; sudo /tmp/kubeadm_preinstall.sh" --zone=us-east1-b
kubeadm_join_command=$(gcloud compute ssh descheduler-$master_uuid --command "sudo chmod 755 /tmp/kubeadm_install.sh; sudo /tmp/kubeadm_install.sh" --zone=us-east1-b|grep 'kubeadm join')
# Copy the kubeconfig file onto /tmp for e2e tests.
# Copy the kubeconfig file onto /tmp for e2e tests.
gcloud compute ssh descheduler-$master_uuid --command "sudo cp /etc/kubernetes/admin.conf /tmp; sudo chmod 777 /tmp/admin.conf" --zone=us-east1-b
gcloud compute scp descheduler-$master_uuid:/tmp/admin.conf /tmp/admin.conf --zone=us-east1-b
@@ -75,16 +75,15 @@ install_kube() {
gcloud compute ssh descheduler-$master_uuid --command "sudo kubectl apply -f https://raw.githubusercontent.com/cloudnativelabs/kube-router/master/daemonset/kubeadm-kuberouter.yaml --kubeconfig /etc/kubernetes/admin.conf" --zone=us-east1-b
echo $kubeadm_join_command > $E2E_GCE_HOME/kubeadm_join.sh
# Copy kubeadm_join to every node.
# Copy kubeadm_join to every node.
#TODO: Put these in a loop, so that extension becomes possible.
gcloud compute ssh descheduler-$node1_uuid --command "sudo chmod 755 /tmp/kubeadm_preinstall.sh; sudo /tmp/kubeadm_preinstall.sh" --zone=us-east1-b
gcloud compute scp $E2E_GCE_HOME/kubeadm_join.sh descheduler-$node1_uuid:/tmp --zone=us-east1-b
gcloud compute ssh descheduler-$node1_uuid --command "sudo chmod 755 /tmp/kubeadm_join.sh; sudo /tmp/kubeadm_join.sh" --zone=us-east1-b
gcloud compute ssh descheduler-$node2_uuid --command "sudo chmod 755 /tmp/kubeadm_preinstall.sh; sudo /tmp/kubeadm_preinstall.sh" --zone=us-east1-b
gcloud compute scp $E2E_GCE_HOME/kubeadm_join.sh descheduler-$node2_uuid:/tmp --zone=us-east1-b
gcloud compute ssh descheduler-$node2_uuid --command "sudo chmod 755 /tmp/kubeadm_join.sh; sudo /tmp/kubeadm_join.sh" --zone=us-east1-b
gcloud compute ssh descheduler-$node2_uuid --command "sudo chmod 755 /tmp/kubeadm_preinstall.sh; sudo /tmp/kubeadm_preinstall.sh" --zone=us-east1-c
gcloud compute scp $E2E_GCE_HOME/kubeadm_join.sh descheduler-$node2_uuid:/tmp --zone=us-east1-c
gcloud compute ssh descheduler-$node2_uuid --command "sudo chmod 755 /tmp/kubeadm_join.sh; sudo /tmp/kubeadm_join.sh" --zone=us-east1-c
}

View File

@@ -1,6 +1,18 @@
kind: Cluster
apiVersion: kind.sigs.k8s.io/v1alpha3
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
- role: worker
kubeadmConfigPatches:
- |
kind: JoinConfiguration
nodeRegistration:
kubeletExtraArgs:
node-labels: "topology.kubernetes.io/zone=local-a"
- role: worker
kubeadmConfigPatches:
- |
kind: JoinConfiguration
nodeRegistration:
kubeletExtraArgs:
node-labels: "topology.kubernetes.io/zone=local-b"

View File

@@ -1,3 +1,4 @@
//go:build tools
// +build tools
/*
@@ -19,4 +20,8 @@ limitations under the License.
// This package imports things required by build scripts, to force `go mod` to see them as dependencies
package tools
import _ "k8s.io/code-generator"
import (
_ "github.com/client9/misspell/cmd/misspell"
_ "k8s.io/code-generator"
_ "sigs.k8s.io/mdtoc"
)

View File

@@ -23,7 +23,7 @@ DESCHEDULER_ROOT=$(dirname "${BASH_SOURCE}")/..
GO_VERSION=($(go version))
if [[ -z $(echo "${GO_VERSION[2]}" | grep -E 'go1.2|go1.3|go1.4|go1.5|go1.6|go1.7|go1.8|go1.9|go1.10|go1.11|go1.12|go1.13|go1.14') ]]; then
if [[ -z $(echo "${GO_VERSION[2]}" | grep -E 'go1.14|go1.15|go1.16|go1.17') ]]; then
echo "Unknown go version '${GO_VERSION[2]}', skipping gofmt."
exit 1
fi

25
hack/update-toc.sh Executable file
View File

@@ -0,0 +1,25 @@
#!/bin/bash
# Copyright 2021 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -o errexit
set -o nounset
set -o pipefail
source "$(dirname "${BASH_SOURCE}")/lib/init.sh"
go build -o "${OS_OUTPUT_BINPATH}/mdtoc" "sigs.k8s.io/mdtoc"
${OS_OUTPUT_BINPATH}/mdtoc --inplace README.md

36
hack/verify-conversions.sh Executable file
View File

@@ -0,0 +1,36 @@
#!/bin/bash
set -o errexit
set -o nounset
set -o pipefail
source "$(dirname "${BASH_SOURCE}")/lib/init.sh"
DESCHEDULER_ROOT=$(dirname "${BASH_SOURCE}")/..
mkdir -p "${DESCHEDULER_ROOT}/_tmp"
_tmpdir="$(mktemp -d "${DESCHEDULER_ROOT}/_tmp/kube-verify.XXXXXX")"
_deschedulertmp="${_tmpdir}"
mkdir -p "${_deschedulertmp}"
git archive --format=tar --prefix=descheduler/ "$(git write-tree)" | (cd "${_deschedulertmp}" && tar xf -)
_deschedulertmp="${_deschedulertmp}/descheduler"
pushd "${_deschedulertmp}" > /dev/null 2>&1
go build -o "${OS_OUTPUT_BINPATH}/conversion-gen" "k8s.io/code-generator/cmd/conversion-gen"
${OS_OUTPUT_BINPATH}/conversion-gen \
--go-header-file "hack/boilerplate/boilerplate.go.txt" \
--input-dirs "./pkg/apis/componentconfig/v1alpha1,./pkg/api/v1alpha1" \
--output-file-base zz_generated.conversion
popd > /dev/null 2>&1
pushd "${DESCHEDULER_ROOT}" > /dev/null 2>&1
if ! _out="$(diff -Naupr pkg/ "${_deschedulertmp}/pkg/")"; then
echo "Generated output differs:" >&2
echo "${_out}" >&2
echo "Generated conversions verify failed. Please run ./hack/update-generated-conversions.sh"
exit 1
fi
popd > /dev/null 2>&1
echo "Generated conversions verified."

36
hack/verify-deep-copies.sh Executable file
View File

@@ -0,0 +1,36 @@
#!/bin/bash
set -o errexit
set -o nounset
set -o pipefail
source "$(dirname "${BASH_SOURCE}")/lib/init.sh"
DESCHEDULER_ROOT=$(dirname "${BASH_SOURCE}")/..
mkdir -p "${DESCHEDULER_ROOT}/_tmp"
_tmpdir="$(mktemp -d "${DESCHEDULER_ROOT}/_tmp/kube-verify.XXXXXX")"
_deschedulertmp="${_tmpdir}"
mkdir -p "${_deschedulertmp}"
git archive --format=tar --prefix=descheduler/ "$(git write-tree)" | (cd "${_deschedulertmp}" && tar xf -)
_deschedulertmp="${_deschedulertmp}/descheduler"
pushd "${_deschedulertmp}" > /dev/null 2>&1
go build -o "${OS_OUTPUT_BINPATH}/deepcopy-gen" "k8s.io/code-generator/cmd/deepcopy-gen"
${OS_OUTPUT_BINPATH}/deepcopy-gen \
--go-header-file "hack/boilerplate/boilerplate.go.txt" \
--input-dirs "./pkg/apis/componentconfig,./pkg/apis/componentconfig/v1alpha1,./pkg/api,./pkg/api/v1alpha1" \
--output-file-base zz_generated.deepcopy
popd > /dev/null 2>&1
pushd "${DESCHEDULER_ROOT}" > /dev/null 2>&1
if ! _out="$(diff -Naupr pkg/ "${_deschedulertmp}/pkg/")"; then
echo "Generated deep-copies output differs:" >&2
echo "${_out}" >&2
echo "Generated deep-copies verify failed. Please run ./hack/update-generated-deep-copies.sh"
exit 1
fi
popd > /dev/null 2>&1
echo "Generated deep-copies verified."

35
hack/verify-defaulters.sh Executable file
View File

@@ -0,0 +1,35 @@
#!/bin/bash
set -o errexit
set -o nounset
set -o pipefail
source "$(dirname "${BASH_SOURCE}")/lib/init.sh"
DESCHEDULER_ROOT=$(dirname "${BASH_SOURCE}")/..
_tmpdir="$(mktemp -d "${DESCHEDULER_ROOT}/_tmp/kube-verify.XXXXXX")"
_deschedulertmp="${_tmpdir}"
mkdir -p "${_deschedulertmp}"
git archive --format=tar --prefix=descheduler/ "$(git write-tree)" | (cd "${_deschedulertmp}" && tar xf -)
_deschedulertmp="${_deschedulertmp}/descheduler"
pushd "${_deschedulertmp}" > /dev/null 2>&1
go build -o "${OS_OUTPUT_BINPATH}/defaulter-gen" "k8s.io/code-generator/cmd/defaulter-gen"
${OS_OUTPUT_BINPATH}/defaulter-gen \
--go-header-file "hack/boilerplate/boilerplate.go.txt" \
--input-dirs "${PRJ_PREFIX}/pkg/apis/componentconfig/v1alpha1,${PRJ_PREFIX}/pkg/api/v1alpha1" \
--extra-peer-dirs "${PRJ_PREFIX}/pkg/apis/componentconfig/v1alpha1,${PRJ_PREFIX}/pkg/api/v1alpha1" \
--output-file-base zz_generated.defaults
popd > /dev/null 2>&1
pushd "${DESCHEDULER_ROOT}" > /dev/null 2>&1
if ! _out="$(diff -Naupr pkg/ "${_deschedulertmp}/pkg/")"; then
echo "Generated defaulters output differs:" >&2
echo "${_out}" >&2
echo "Generated defaulters verify failed. Please run ./hack/update-generated-defaulters.sh"
fi
popd > /dev/null 2>&1
echo "Generated Defaulters verified."

View File

@@ -23,7 +23,7 @@ DESCHEDULER_ROOT=$(dirname "${BASH_SOURCE}")/..
GO_VERSION=($(go version))
if [[ -z $(echo "${GO_VERSION[2]}" | grep -E 'go1.2|go1.3|go1.4|go1.5|go1.6|go1.7|go1.8|go1.9|go1.10|go1.11|go1.12|go1.13|go1.14') ]]; then
if [[ -z $(echo "${GO_VERSION[2]}" | grep -E 'go1.14|go1.15|go1.16|go1.17') ]]; then
echo "Unknown go version '${GO_VERSION[2]}', skipping gofmt."
exit 1
fi

19
hack/verify-govet.sh Executable file
View File

@@ -0,0 +1,19 @@
#!/bin/bash
# Copyright 2021 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
source "$(dirname "${BASH_SOURCE}")/lib/init.sh"
go vet ${OS_ROOT}/...

41
hack/verify-spelling.sh Executable file
View File

@@ -0,0 +1,41 @@
#!/usr/bin/env bash
# Copyright 2018 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script checks commonly misspelled English words in all files in the
# working directory by client9/misspell package.
# Usage: `hack/verify-spelling.sh`.
set -o errexit
set -o nounset
set -o pipefail
KUBE_ROOT=$(dirname "${BASH_SOURCE[0]}")/..
export KUBE_ROOT
source "${KUBE_ROOT}/hack/lib/init.sh"
# Ensure that we find the binaries we build before anything else.
export GOBIN="${OS_OUTPUT_BINPATH}"
PATH="${GOBIN}:${PATH}"
# Install tools we need
pushd "${KUBE_ROOT}" >/dev/null
GO111MODULE=on go install github.com/client9/misspell/cmd/misspell
popd >/dev/null
# Spell checking
# All the skipping files are defined in hack/.spelling_failures
skipping_file="${KUBE_ROOT}/hack/.spelling_failures"
failing_packages=$(sed "s| | -e |g" "${skipping_file}")
git ls-files | grep -v -e "${failing_packages}" | xargs misspell -i "Creater,creater,ect" -error -o stderr

29
hack/verify-toc.sh Executable file
View File

@@ -0,0 +1,29 @@
#!/bin/bash
# Copyright 2021 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -o errexit
set -o nounset
set -o pipefail
source "$(dirname "${BASH_SOURCE}")/lib/init.sh"
go build -o "${OS_OUTPUT_BINPATH}/mdtoc" "sigs.k8s.io/mdtoc"
if ! ${OS_OUTPUT_BINPATH}/mdtoc --inplace --dryrun README.md
then
echo "ERROR: Changes detected to table of contents. Run ./hack/update-toc.sh" >&2
exit 1
fi

View File

@@ -0,0 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- configmap.yaml
- rbac.yaml

View File

@@ -3,7 +3,6 @@ kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: descheduler-cluster-role
namespace: kube-system
rules:
- apiGroups: [""]
resources: ["events"]
@@ -11,12 +10,18 @@ rules:
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "watch", "list"]
- apiGroups: [""]
resources: ["namespaces"]
verbs: ["get", "watch", "list"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "watch", "list", "delete"]
- apiGroups: [""]
resources: ["pods/eviction"]
verbs: ["create"]
- apiGroups: ["scheduling.k8s.io"]
resources: ["priorityclasses"]
verbs: ["get", "watch", "list"]
---
apiVersion: v1
kind: ServiceAccount
@@ -28,7 +33,6 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: descheduler-cluster-role-binding
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole

View File

@@ -1,5 +1,5 @@
---
apiVersion: batch/v1beta1
apiVersion: batch/v1 # for k8s version < 1.21.0, use batch/v1beta1
kind: CronJob
metadata:
name: descheduler-cronjob
@@ -16,7 +16,7 @@ spec:
priorityClassName: system-cluster-critical
containers:
- name: descheduler
image: us.gcr.io/k8s-artifacts-prod/descheduler/descheduler:v0.18.0
image: k8s.gcr.io/descheduler/descheduler:v0.23.0
volumeMounts:
- mountPath: /policy-dir
name: policy-volume
@@ -27,6 +27,26 @@ spec:
- "/policy-dir/policy.yaml"
- "--v"
- "3"
resources:
requests:
cpu: "500m"
memory: "256Mi"
livenessProbe:
failureThreshold: 3
httpGet:
path: /healthz
port: 10258
scheme: HTTPS
initialDelaySeconds: 3
periodSeconds: 10
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
privileged: false
readOnlyRootFilesystem: true
runAsNonRoot: true
restartPolicy: "Never"
serviceAccountName: descheduler-sa
volumes:

View File

@@ -0,0 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../base
- cronjob.yaml

View File

@@ -0,0 +1,62 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: descheduler
namespace: kube-system
labels:
app: descheduler
spec:
replicas: 1
selector:
matchLabels:
app: descheduler
template:
metadata:
labels:
app: descheduler
spec:
priorityClassName: system-cluster-critical
serviceAccountName: descheduler-sa
containers:
- name: descheduler
image: k8s.gcr.io/descheduler/descheduler:v0.23.0
imagePullPolicy: IfNotPresent
command:
- "/bin/descheduler"
args:
- "--policy-config-file"
- "/policy-dir/policy.yaml"
- "--descheduling-interval"
- "5m"
- "--v"
- "3"
ports:
- containerPort: 10258
protocol: TCP
livenessProbe:
failureThreshold: 3
httpGet:
path: /healthz
port: 10258
scheme: HTTPS
initialDelaySeconds: 3
periodSeconds: 10
resources:
requests:
cpu: 500m
memory: 256Mi
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
privileged: false
readOnlyRootFilesystem: true
runAsNonRoot: true
volumeMounts:
- mountPath: /policy-dir
name: policy-volume
volumes:
- name: policy-volume
configMap:
name: descheduler-policy-configmap

View File

@@ -0,0 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../base
- deployment.yaml

View File

@@ -14,7 +14,7 @@ spec:
priorityClassName: system-cluster-critical
containers:
- name: descheduler
image: us.gcr.io/k8s-artifacts-prod/descheduler/descheduler:v0.18.0
image: k8s.gcr.io/descheduler/descheduler:v0.23.0
volumeMounts:
- mountPath: /policy-dir
name: policy-volume
@@ -25,6 +25,26 @@ spec:
- "/policy-dir/policy.yaml"
- "--v"
- "3"
resources:
requests:
cpu: "500m"
memory: "256Mi"
livenessProbe:
failureThreshold: 3
httpGet:
path: /healthz
port: 10258
scheme: HTTPS
initialDelaySeconds: 3
periodSeconds: 10
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
privileged: false
readOnlyRootFilesystem: true
runAsNonRoot: true
restartPolicy: "Never"
serviceAccountName: descheduler-sa
volumes:

View File

@@ -0,0 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../base
- job.yaml

72
metrics/metrics.go Normal file
View File

@@ -0,0 +1,72 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"sync"
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
"sigs.k8s.io/descheduler/pkg/version"
)
const (
// DeschedulerSubsystem - subsystem name used by descheduler
DeschedulerSubsystem = "descheduler"
)
var (
PodsEvicted = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: DeschedulerSubsystem,
Name: "pods_evicted",
Help: "Number of evicted pods, by the result, by the strategy, by the namespace, by the node name. 'error' result means a pod could not be evicted",
StabilityLevel: metrics.ALPHA,
}, []string{"result", "strategy", "namespace", "node"})
buildInfo = metrics.NewGauge(
&metrics.GaugeOpts{
Subsystem: DeschedulerSubsystem,
Name: "build_info",
Help: "Build info about descheduler, including Go version, Descheduler version, Git SHA, Git branch",
ConstLabels: map[string]string{"GoVersion": version.Get().GoVersion, "DeschedulerVersion": version.Get().GitVersion, "GitBranch": version.Get().GitBranch, "GitSha1": version.Get().GitSha1},
StabilityLevel: metrics.ALPHA,
},
)
metricsList = []metrics.Registerable{
PodsEvicted,
buildInfo,
}
)
var registerMetrics sync.Once
// Register all metrics.
func Register() {
// Register the metrics.
registerMetrics.Do(func() {
RegisterMetrics(metricsList...)
})
}
// RegisterMetrics registers a list of metrics.
func RegisterMetrics(extraMetrics ...metrics.Registerable) {
for _, metric := range extraMetrics {
legacyregistry.MustRegister(metric)
}
}

View File

@@ -19,8 +19,6 @@ package api
import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"sigs.k8s.io/descheduler/pkg/descheduler/scheme"
)
var (
@@ -35,12 +33,6 @@ const GroupName = "descheduler"
// SchemeGroupVersion is group version used to register these objects
var SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: runtime.APIVersionInternal}
func init() {
if err := addKnownTypes(scheme.Scheme); err != nil {
panic(err)
}
}
// Kind takes an unqualified kind and returns a Group qualified GroupKind
func Kind(kind string) schema.GroupKind {
return SchemeGroupVersion.WithKind(kind).GroupKind()

View File

@@ -28,6 +28,27 @@ type DeschedulerPolicy struct {
// Strategies
Strategies StrategyList
// NodeSelector for a set of nodes to operate over
NodeSelector *string
// EvictFailedBarePods allows pods without ownerReferences and in failed phase to be evicted.
EvictFailedBarePods *bool
// EvictLocalStoragePods allows pods using local storage to be evicted.
EvictLocalStoragePods *bool
// EvictSystemCriticalPods allows eviction of pods of any priority (including Kubernetes system pods)
EvictSystemCriticalPods *bool
// IgnorePVCPods prevents pods with PVCs from being evicted.
IgnorePVCPods *bool
// MaxNoOfPodsToEvictPerNode restricts maximum of pods to be evicted per node.
MaxNoOfPodsToEvictPerNode *uint
// MaxNoOfPodsToEvictPerNamespace restricts maximum of pods to be evicted per namespace.
MaxNoOfPodsToEvictPerNamespace *uint
}
type StrategyName string
@@ -44,13 +65,29 @@ type DeschedulerStrategy struct {
Params *StrategyParameters
}
// Only one of its members may be specified
// Namespaces carries a list of included/excluded namespaces
// for which a given strategy is applicable
type Namespaces struct {
Include []string
Exclude []string
}
// Besides Namespaces only one of its members may be specified
// TODO(jchaloup): move Namespaces ThresholdPriority and ThresholdPriorityClassName to individual strategies
// once the policy version is bumped to v1alpha2
type StrategyParameters struct {
NodeResourceUtilizationThresholds *NodeResourceUtilizationThresholds
NodeAffinityType []string
PodsHavingTooManyRestarts *PodsHavingTooManyRestarts
MaxPodLifeTimeSeconds *uint
PodLifeTime *PodLifeTime
RemoveDuplicates *RemoveDuplicates
FailedPods *FailedPods
IncludeSoftConstraints bool
Namespaces *Namespaces
ThresholdPriority *int32
ThresholdPriorityClassName string
LabelSelector *metav1.LabelSelector
NodeFit bool
}
type Percentage float64
@@ -70,3 +107,15 @@ type PodsHavingTooManyRestarts struct {
type RemoveDuplicates struct {
ExcludeOwnerKinds []string
}
type PodLifeTime struct {
MaxPodLifeTimeSeconds *uint
PodStatusPhases []string
}
type FailedPods struct {
ExcludeOwnerKinds []string
MinPodLifetimeSeconds *uint
Reasons []string
IncludingInitContainers bool
}

View File

@@ -28,6 +28,27 @@ type DeschedulerPolicy struct {
// Strategies
Strategies StrategyList `json:"strategies,omitempty"`
// NodeSelector for a set of nodes to operate over
NodeSelector *string `json:"nodeSelector,omitempty"`
// EvictFailedBarePods allows pods without ownerReferences and in failed phase to be evicted.
EvictFailedBarePods *bool `json:"evictFailedBarePods,omitempty"`
// EvictLocalStoragePods allows pods using local storage to be evicted.
EvictLocalStoragePods *bool `json:"evictLocalStoragePods,omitempty"`
// EvictSystemCriticalPods allows eviction of pods of any priority (including Kubernetes system pods)
EvictSystemCriticalPods *bool `json:"evictSystemCriticalPods,omitempty"`
// IgnorePVCPods prevents pods with PVCs from being evicted.
IgnorePVCPods *bool `json:"ignorePvcPods,omitempty"`
// MaxNoOfPodsToEvictPerNode restricts maximum of pods to be evicted per node.
MaxNoOfPodsToEvictPerNode *int `json:"maxNoOfPodsToEvictPerNode,omitempty"`
// MaxNoOfPodsToEvictPerNamespace restricts maximum of pods to be evicted per namespace.
MaxNoOfPodsToEvictPerNamespace *int `json:"maxNoOfPodsToEvictPerNamespace,omitempty"`
}
type StrategyName string
@@ -44,13 +65,27 @@ type DeschedulerStrategy struct {
Params *StrategyParameters `json:"params,omitempty"`
}
// Only one of its members may be specified
// Namespaces carries a list of included/excluded namespaces
// for which a given strategy is applicable.
type Namespaces struct {
Include []string `json:"include"`
Exclude []string `json:"exclude"`
}
// Besides Namespaces ThresholdPriority and ThresholdPriorityClassName only one of its members may be specified
type StrategyParameters struct {
NodeResourceUtilizationThresholds *NodeResourceUtilizationThresholds `json:"nodeResourceUtilizationThresholds,omitempty"`
NodeAffinityType []string `json:"nodeAffinityType,omitempty"`
PodsHavingTooManyRestarts *PodsHavingTooManyRestarts `json:"podsHavingTooManyRestarts,omitempty"`
MaxPodLifeTimeSeconds *uint `json:"maxPodLifeTimeSeconds,omitempty"`
PodLifeTime *PodLifeTime `json:"podLifeTime,omitempty"`
RemoveDuplicates *RemoveDuplicates `json:"removeDuplicates,omitempty"`
FailedPods *FailedPods `json:"failedPods,omitempty"`
IncludeSoftConstraints bool `json:"includeSoftConstraints"`
Namespaces *Namespaces `json:"namespaces"`
ThresholdPriority *int32 `json:"thresholdPriority"`
ThresholdPriorityClassName string `json:"thresholdPriorityClassName"`
LabelSelector *metav1.LabelSelector `json:"labelSelector"`
NodeFit bool `json:"nodeFit"`
}
type Percentage float64
@@ -70,3 +105,15 @@ type PodsHavingTooManyRestarts struct {
type RemoveDuplicates struct {
ExcludeOwnerKinds []string `json:"excludeOwnerKinds,omitempty"`
}
type PodLifeTime struct {
MaxPodLifeTimeSeconds *uint `json:"maxPodLifeTimeSeconds,omitempty"`
PodStatusPhases []string `json:"podStatusPhases,omitempty"`
}
type FailedPods struct {
ExcludeOwnerKinds []string `json:"excludeOwnerKinds,omitempty"`
MinPodLifetimeSeconds *uint `json:"minPodLifetimeSeconds,omitempty"`
Reasons []string `json:"reasons,omitempty"`
IncludingInitContainers bool `json:"includingInitContainers,omitempty"`
}

View File

@@ -1,7 +1,8 @@
//go:build !ignore_autogenerated
// +build !ignore_autogenerated
/*
Copyright 2020 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -23,6 +24,7 @@ package v1alpha1
import (
unsafe "unsafe"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
conversion "k8s.io/apimachinery/pkg/conversion"
runtime "k8s.io/apimachinery/pkg/runtime"
api "sigs.k8s.io/descheduler/pkg/api"
@@ -55,6 +57,26 @@ func RegisterConversions(s *runtime.Scheme) error {
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*FailedPods)(nil), (*api.FailedPods)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha1_FailedPods_To_api_FailedPods(a.(*FailedPods), b.(*api.FailedPods), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*api.FailedPods)(nil), (*FailedPods)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_api_FailedPods_To_v1alpha1_FailedPods(a.(*api.FailedPods), b.(*FailedPods), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*Namespaces)(nil), (*api.Namespaces)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha1_Namespaces_To_api_Namespaces(a.(*Namespaces), b.(*api.Namespaces), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*api.Namespaces)(nil), (*Namespaces)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_api_Namespaces_To_v1alpha1_Namespaces(a.(*api.Namespaces), b.(*Namespaces), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*NodeResourceUtilizationThresholds)(nil), (*api.NodeResourceUtilizationThresholds)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha1_NodeResourceUtilizationThresholds_To_api_NodeResourceUtilizationThresholds(a.(*NodeResourceUtilizationThresholds), b.(*api.NodeResourceUtilizationThresholds), scope)
}); err != nil {
@@ -65,6 +87,16 @@ func RegisterConversions(s *runtime.Scheme) error {
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*PodLifeTime)(nil), (*api.PodLifeTime)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha1_PodLifeTime_To_api_PodLifeTime(a.(*PodLifeTime), b.(*api.PodLifeTime), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*api.PodLifeTime)(nil), (*PodLifeTime)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_api_PodLifeTime_To_v1alpha1_PodLifeTime(a.(*api.PodLifeTime), b.(*PodLifeTime), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*PodsHavingTooManyRestarts)(nil), (*api.PodsHavingTooManyRestarts)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha1_PodsHavingTooManyRestarts_To_api_PodsHavingTooManyRestarts(a.(*PodsHavingTooManyRestarts), b.(*api.PodsHavingTooManyRestarts), scope)
}); err != nil {
@@ -100,6 +132,25 @@ func RegisterConversions(s *runtime.Scheme) error {
func autoConvert_v1alpha1_DeschedulerPolicy_To_api_DeschedulerPolicy(in *DeschedulerPolicy, out *api.DeschedulerPolicy, s conversion.Scope) error {
out.Strategies = *(*api.StrategyList)(unsafe.Pointer(&in.Strategies))
out.NodeSelector = (*string)(unsafe.Pointer(in.NodeSelector))
out.EvictFailedBarePods = (*bool)(unsafe.Pointer(in.EvictFailedBarePods))
out.EvictLocalStoragePods = (*bool)(unsafe.Pointer(in.EvictLocalStoragePods))
out.EvictSystemCriticalPods = (*bool)(unsafe.Pointer(in.EvictSystemCriticalPods))
out.IgnorePVCPods = (*bool)(unsafe.Pointer(in.IgnorePVCPods))
if in.MaxNoOfPodsToEvictPerNode != nil {
in, out := &in.MaxNoOfPodsToEvictPerNode, &out.MaxNoOfPodsToEvictPerNode
*out = new(uint)
**out = uint(**in)
} else {
out.MaxNoOfPodsToEvictPerNode = nil
}
if in.MaxNoOfPodsToEvictPerNamespace != nil {
in, out := &in.MaxNoOfPodsToEvictPerNamespace, &out.MaxNoOfPodsToEvictPerNamespace
*out = new(uint)
**out = uint(**in)
} else {
out.MaxNoOfPodsToEvictPerNamespace = nil
}
return nil
}
@@ -110,6 +161,25 @@ func Convert_v1alpha1_DeschedulerPolicy_To_api_DeschedulerPolicy(in *Descheduler
func autoConvert_api_DeschedulerPolicy_To_v1alpha1_DeschedulerPolicy(in *api.DeschedulerPolicy, out *DeschedulerPolicy, s conversion.Scope) error {
out.Strategies = *(*StrategyList)(unsafe.Pointer(&in.Strategies))
out.NodeSelector = (*string)(unsafe.Pointer(in.NodeSelector))
out.EvictFailedBarePods = (*bool)(unsafe.Pointer(in.EvictFailedBarePods))
out.EvictLocalStoragePods = (*bool)(unsafe.Pointer(in.EvictLocalStoragePods))
out.EvictSystemCriticalPods = (*bool)(unsafe.Pointer(in.EvictSystemCriticalPods))
out.IgnorePVCPods = (*bool)(unsafe.Pointer(in.IgnorePVCPods))
if in.MaxNoOfPodsToEvictPerNode != nil {
in, out := &in.MaxNoOfPodsToEvictPerNode, &out.MaxNoOfPodsToEvictPerNode
*out = new(int)
**out = int(**in)
} else {
out.MaxNoOfPodsToEvictPerNode = nil
}
if in.MaxNoOfPodsToEvictPerNamespace != nil {
in, out := &in.MaxNoOfPodsToEvictPerNamespace, &out.MaxNoOfPodsToEvictPerNamespace
*out = new(int)
**out = int(**in)
} else {
out.MaxNoOfPodsToEvictPerNamespace = nil
}
return nil
}
@@ -142,6 +212,54 @@ func Convert_api_DeschedulerStrategy_To_v1alpha1_DeschedulerStrategy(in *api.Des
return autoConvert_api_DeschedulerStrategy_To_v1alpha1_DeschedulerStrategy(in, out, s)
}
func autoConvert_v1alpha1_FailedPods_To_api_FailedPods(in *FailedPods, out *api.FailedPods, s conversion.Scope) error {
out.ExcludeOwnerKinds = *(*[]string)(unsafe.Pointer(&in.ExcludeOwnerKinds))
out.MinPodLifetimeSeconds = (*uint)(unsafe.Pointer(in.MinPodLifetimeSeconds))
out.Reasons = *(*[]string)(unsafe.Pointer(&in.Reasons))
out.IncludingInitContainers = in.IncludingInitContainers
return nil
}
// Convert_v1alpha1_FailedPods_To_api_FailedPods is an autogenerated conversion function.
func Convert_v1alpha1_FailedPods_To_api_FailedPods(in *FailedPods, out *api.FailedPods, s conversion.Scope) error {
return autoConvert_v1alpha1_FailedPods_To_api_FailedPods(in, out, s)
}
func autoConvert_api_FailedPods_To_v1alpha1_FailedPods(in *api.FailedPods, out *FailedPods, s conversion.Scope) error {
out.ExcludeOwnerKinds = *(*[]string)(unsafe.Pointer(&in.ExcludeOwnerKinds))
out.MinPodLifetimeSeconds = (*uint)(unsafe.Pointer(in.MinPodLifetimeSeconds))
out.Reasons = *(*[]string)(unsafe.Pointer(&in.Reasons))
out.IncludingInitContainers = in.IncludingInitContainers
return nil
}
// Convert_api_FailedPods_To_v1alpha1_FailedPods is an autogenerated conversion function.
func Convert_api_FailedPods_To_v1alpha1_FailedPods(in *api.FailedPods, out *FailedPods, s conversion.Scope) error {
return autoConvert_api_FailedPods_To_v1alpha1_FailedPods(in, out, s)
}
func autoConvert_v1alpha1_Namespaces_To_api_Namespaces(in *Namespaces, out *api.Namespaces, s conversion.Scope) error {
out.Include = *(*[]string)(unsafe.Pointer(&in.Include))
out.Exclude = *(*[]string)(unsafe.Pointer(&in.Exclude))
return nil
}
// Convert_v1alpha1_Namespaces_To_api_Namespaces is an autogenerated conversion function.
func Convert_v1alpha1_Namespaces_To_api_Namespaces(in *Namespaces, out *api.Namespaces, s conversion.Scope) error {
return autoConvert_v1alpha1_Namespaces_To_api_Namespaces(in, out, s)
}
func autoConvert_api_Namespaces_To_v1alpha1_Namespaces(in *api.Namespaces, out *Namespaces, s conversion.Scope) error {
out.Include = *(*[]string)(unsafe.Pointer(&in.Include))
out.Exclude = *(*[]string)(unsafe.Pointer(&in.Exclude))
return nil
}
// Convert_api_Namespaces_To_v1alpha1_Namespaces is an autogenerated conversion function.
func Convert_api_Namespaces_To_v1alpha1_Namespaces(in *api.Namespaces, out *Namespaces, s conversion.Scope) error {
return autoConvert_api_Namespaces_To_v1alpha1_Namespaces(in, out, s)
}
func autoConvert_v1alpha1_NodeResourceUtilizationThresholds_To_api_NodeResourceUtilizationThresholds(in *NodeResourceUtilizationThresholds, out *api.NodeResourceUtilizationThresholds, s conversion.Scope) error {
out.Thresholds = *(*api.ResourceThresholds)(unsafe.Pointer(&in.Thresholds))
out.TargetThresholds = *(*api.ResourceThresholds)(unsafe.Pointer(&in.TargetThresholds))
@@ -166,6 +284,28 @@ func Convert_api_NodeResourceUtilizationThresholds_To_v1alpha1_NodeResourceUtili
return autoConvert_api_NodeResourceUtilizationThresholds_To_v1alpha1_NodeResourceUtilizationThresholds(in, out, s)
}
func autoConvert_v1alpha1_PodLifeTime_To_api_PodLifeTime(in *PodLifeTime, out *api.PodLifeTime, s conversion.Scope) error {
out.MaxPodLifeTimeSeconds = (*uint)(unsafe.Pointer(in.MaxPodLifeTimeSeconds))
out.PodStatusPhases = *(*[]string)(unsafe.Pointer(&in.PodStatusPhases))
return nil
}
// Convert_v1alpha1_PodLifeTime_To_api_PodLifeTime is an autogenerated conversion function.
func Convert_v1alpha1_PodLifeTime_To_api_PodLifeTime(in *PodLifeTime, out *api.PodLifeTime, s conversion.Scope) error {
return autoConvert_v1alpha1_PodLifeTime_To_api_PodLifeTime(in, out, s)
}
func autoConvert_api_PodLifeTime_To_v1alpha1_PodLifeTime(in *api.PodLifeTime, out *PodLifeTime, s conversion.Scope) error {
out.MaxPodLifeTimeSeconds = (*uint)(unsafe.Pointer(in.MaxPodLifeTimeSeconds))
out.PodStatusPhases = *(*[]string)(unsafe.Pointer(&in.PodStatusPhases))
return nil
}
// Convert_api_PodLifeTime_To_v1alpha1_PodLifeTime is an autogenerated conversion function.
func Convert_api_PodLifeTime_To_v1alpha1_PodLifeTime(in *api.PodLifeTime, out *PodLifeTime, s conversion.Scope) error {
return autoConvert_api_PodLifeTime_To_v1alpha1_PodLifeTime(in, out, s)
}
func autoConvert_v1alpha1_PodsHavingTooManyRestarts_To_api_PodsHavingTooManyRestarts(in *PodsHavingTooManyRestarts, out *api.PodsHavingTooManyRestarts, s conversion.Scope) error {
out.PodRestartThreshold = in.PodRestartThreshold
out.IncludingInitContainers = in.IncludingInitContainers
@@ -212,8 +352,15 @@ func autoConvert_v1alpha1_StrategyParameters_To_api_StrategyParameters(in *Strat
out.NodeResourceUtilizationThresholds = (*api.NodeResourceUtilizationThresholds)(unsafe.Pointer(in.NodeResourceUtilizationThresholds))
out.NodeAffinityType = *(*[]string)(unsafe.Pointer(&in.NodeAffinityType))
out.PodsHavingTooManyRestarts = (*api.PodsHavingTooManyRestarts)(unsafe.Pointer(in.PodsHavingTooManyRestarts))
out.MaxPodLifeTimeSeconds = (*uint)(unsafe.Pointer(in.MaxPodLifeTimeSeconds))
out.PodLifeTime = (*api.PodLifeTime)(unsafe.Pointer(in.PodLifeTime))
out.RemoveDuplicates = (*api.RemoveDuplicates)(unsafe.Pointer(in.RemoveDuplicates))
out.FailedPods = (*api.FailedPods)(unsafe.Pointer(in.FailedPods))
out.IncludeSoftConstraints = in.IncludeSoftConstraints
out.Namespaces = (*api.Namespaces)(unsafe.Pointer(in.Namespaces))
out.ThresholdPriority = (*int32)(unsafe.Pointer(in.ThresholdPriority))
out.ThresholdPriorityClassName = in.ThresholdPriorityClassName
out.LabelSelector = (*v1.LabelSelector)(unsafe.Pointer(in.LabelSelector))
out.NodeFit = in.NodeFit
return nil
}
@@ -226,8 +373,15 @@ func autoConvert_api_StrategyParameters_To_v1alpha1_StrategyParameters(in *api.S
out.NodeResourceUtilizationThresholds = (*NodeResourceUtilizationThresholds)(unsafe.Pointer(in.NodeResourceUtilizationThresholds))
out.NodeAffinityType = *(*[]string)(unsafe.Pointer(&in.NodeAffinityType))
out.PodsHavingTooManyRestarts = (*PodsHavingTooManyRestarts)(unsafe.Pointer(in.PodsHavingTooManyRestarts))
out.MaxPodLifeTimeSeconds = (*uint)(unsafe.Pointer(in.MaxPodLifeTimeSeconds))
out.PodLifeTime = (*PodLifeTime)(unsafe.Pointer(in.PodLifeTime))
out.RemoveDuplicates = (*RemoveDuplicates)(unsafe.Pointer(in.RemoveDuplicates))
out.FailedPods = (*FailedPods)(unsafe.Pointer(in.FailedPods))
out.IncludeSoftConstraints = in.IncludeSoftConstraints
out.Namespaces = (*Namespaces)(unsafe.Pointer(in.Namespaces))
out.ThresholdPriority = (*int32)(unsafe.Pointer(in.ThresholdPriority))
out.ThresholdPriorityClassName = in.ThresholdPriorityClassName
out.LabelSelector = (*v1.LabelSelector)(unsafe.Pointer(in.LabelSelector))
out.NodeFit = in.NodeFit
return nil
}

View File

@@ -1,7 +1,8 @@
//go:build !ignore_autogenerated
// +build !ignore_autogenerated
/*
Copyright 2020 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -21,6 +22,7 @@ limitations under the License.
package v1alpha1
import (
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtime "k8s.io/apimachinery/pkg/runtime"
)
@@ -35,6 +37,41 @@ func (in *DeschedulerPolicy) DeepCopyInto(out *DeschedulerPolicy) {
(*out)[key] = *val.DeepCopy()
}
}
if in.NodeSelector != nil {
in, out := &in.NodeSelector, &out.NodeSelector
*out = new(string)
**out = **in
}
if in.EvictFailedBarePods != nil {
in, out := &in.EvictFailedBarePods, &out.EvictFailedBarePods
*out = new(bool)
**out = **in
}
if in.EvictLocalStoragePods != nil {
in, out := &in.EvictLocalStoragePods, &out.EvictLocalStoragePods
*out = new(bool)
**out = **in
}
if in.EvictSystemCriticalPods != nil {
in, out := &in.EvictSystemCriticalPods, &out.EvictSystemCriticalPods
*out = new(bool)
**out = **in
}
if in.IgnorePVCPods != nil {
in, out := &in.IgnorePVCPods, &out.IgnorePVCPods
*out = new(bool)
**out = **in
}
if in.MaxNoOfPodsToEvictPerNode != nil {
in, out := &in.MaxNoOfPodsToEvictPerNode, &out.MaxNoOfPodsToEvictPerNode
*out = new(int)
**out = **in
}
if in.MaxNoOfPodsToEvictPerNamespace != nil {
in, out := &in.MaxNoOfPodsToEvictPerNamespace, &out.MaxNoOfPodsToEvictPerNamespace
*out = new(int)
**out = **in
}
return
}
@@ -77,6 +114,63 @@ func (in *DeschedulerStrategy) DeepCopy() *DeschedulerStrategy {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *FailedPods) DeepCopyInto(out *FailedPods) {
*out = *in
if in.ExcludeOwnerKinds != nil {
in, out := &in.ExcludeOwnerKinds, &out.ExcludeOwnerKinds
*out = make([]string, len(*in))
copy(*out, *in)
}
if in.MinPodLifetimeSeconds != nil {
in, out := &in.MinPodLifetimeSeconds, &out.MinPodLifetimeSeconds
*out = new(uint)
**out = **in
}
if in.Reasons != nil {
in, out := &in.Reasons, &out.Reasons
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FailedPods.
func (in *FailedPods) DeepCopy() *FailedPods {
if in == nil {
return nil
}
out := new(FailedPods)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Namespaces) DeepCopyInto(out *Namespaces) {
*out = *in
if in.Include != nil {
in, out := &in.Include, &out.Include
*out = make([]string, len(*in))
copy(*out, *in)
}
if in.Exclude != nil {
in, out := &in.Exclude, &out.Exclude
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Namespaces.
func (in *Namespaces) DeepCopy() *Namespaces {
if in == nil {
return nil
}
out := new(Namespaces)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *NodeResourceUtilizationThresholds) DeepCopyInto(out *NodeResourceUtilizationThresholds) {
*out = *in
@@ -107,6 +201,32 @@ func (in *NodeResourceUtilizationThresholds) DeepCopy() *NodeResourceUtilization
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PodLifeTime) DeepCopyInto(out *PodLifeTime) {
*out = *in
if in.MaxPodLifeTimeSeconds != nil {
in, out := &in.MaxPodLifeTimeSeconds, &out.MaxPodLifeTimeSeconds
*out = new(uint)
**out = **in
}
if in.PodStatusPhases != nil {
in, out := &in.PodStatusPhases, &out.PodStatusPhases
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodLifeTime.
func (in *PodLifeTime) DeepCopy() *PodLifeTime {
if in == nil {
return nil
}
out := new(PodLifeTime)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PodsHavingTooManyRestarts) DeepCopyInto(out *PodsHavingTooManyRestarts) {
*out = *in
@@ -206,16 +326,36 @@ func (in *StrategyParameters) DeepCopyInto(out *StrategyParameters) {
*out = new(PodsHavingTooManyRestarts)
**out = **in
}
if in.MaxPodLifeTimeSeconds != nil {
in, out := &in.MaxPodLifeTimeSeconds, &out.MaxPodLifeTimeSeconds
*out = new(uint)
**out = **in
if in.PodLifeTime != nil {
in, out := &in.PodLifeTime, &out.PodLifeTime
*out = new(PodLifeTime)
(*in).DeepCopyInto(*out)
}
if in.RemoveDuplicates != nil {
in, out := &in.RemoveDuplicates, &out.RemoveDuplicates
*out = new(RemoveDuplicates)
(*in).DeepCopyInto(*out)
}
if in.FailedPods != nil {
in, out := &in.FailedPods, &out.FailedPods
*out = new(FailedPods)
(*in).DeepCopyInto(*out)
}
if in.Namespaces != nil {
in, out := &in.Namespaces, &out.Namespaces
*out = new(Namespaces)
(*in).DeepCopyInto(*out)
}
if in.ThresholdPriority != nil {
in, out := &in.ThresholdPriority, &out.ThresholdPriority
*out = new(int32)
**out = **in
}
if in.LabelSelector != nil {
in, out := &in.LabelSelector, &out.LabelSelector
*out = new(v1.LabelSelector)
(*in).DeepCopyInto(*out)
}
return
}

View File

@@ -1,7 +1,8 @@
//go:build !ignore_autogenerated
// +build !ignore_autogenerated
/*
Copyright 2020 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@@ -1,7 +1,8 @@
//go:build !ignore_autogenerated
// +build !ignore_autogenerated
/*
Copyright 2020 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -21,6 +22,7 @@ limitations under the License.
package api
import (
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtime "k8s.io/apimachinery/pkg/runtime"
)
@@ -35,6 +37,41 @@ func (in *DeschedulerPolicy) DeepCopyInto(out *DeschedulerPolicy) {
(*out)[key] = *val.DeepCopy()
}
}
if in.NodeSelector != nil {
in, out := &in.NodeSelector, &out.NodeSelector
*out = new(string)
**out = **in
}
if in.EvictFailedBarePods != nil {
in, out := &in.EvictFailedBarePods, &out.EvictFailedBarePods
*out = new(bool)
**out = **in
}
if in.EvictLocalStoragePods != nil {
in, out := &in.EvictLocalStoragePods, &out.EvictLocalStoragePods
*out = new(bool)
**out = **in
}
if in.EvictSystemCriticalPods != nil {
in, out := &in.EvictSystemCriticalPods, &out.EvictSystemCriticalPods
*out = new(bool)
**out = **in
}
if in.IgnorePVCPods != nil {
in, out := &in.IgnorePVCPods, &out.IgnorePVCPods
*out = new(bool)
**out = **in
}
if in.MaxNoOfPodsToEvictPerNode != nil {
in, out := &in.MaxNoOfPodsToEvictPerNode, &out.MaxNoOfPodsToEvictPerNode
*out = new(uint)
**out = **in
}
if in.MaxNoOfPodsToEvictPerNamespace != nil {
in, out := &in.MaxNoOfPodsToEvictPerNamespace, &out.MaxNoOfPodsToEvictPerNamespace
*out = new(uint)
**out = **in
}
return
}
@@ -77,6 +114,63 @@ func (in *DeschedulerStrategy) DeepCopy() *DeschedulerStrategy {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *FailedPods) DeepCopyInto(out *FailedPods) {
*out = *in
if in.ExcludeOwnerKinds != nil {
in, out := &in.ExcludeOwnerKinds, &out.ExcludeOwnerKinds
*out = make([]string, len(*in))
copy(*out, *in)
}
if in.MinPodLifetimeSeconds != nil {
in, out := &in.MinPodLifetimeSeconds, &out.MinPodLifetimeSeconds
*out = new(uint)
**out = **in
}
if in.Reasons != nil {
in, out := &in.Reasons, &out.Reasons
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FailedPods.
func (in *FailedPods) DeepCopy() *FailedPods {
if in == nil {
return nil
}
out := new(FailedPods)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Namespaces) DeepCopyInto(out *Namespaces) {
*out = *in
if in.Include != nil {
in, out := &in.Include, &out.Include
*out = make([]string, len(*in))
copy(*out, *in)
}
if in.Exclude != nil {
in, out := &in.Exclude, &out.Exclude
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Namespaces.
func (in *Namespaces) DeepCopy() *Namespaces {
if in == nil {
return nil
}
out := new(Namespaces)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *NodeResourceUtilizationThresholds) DeepCopyInto(out *NodeResourceUtilizationThresholds) {
*out = *in
@@ -107,6 +201,32 @@ func (in *NodeResourceUtilizationThresholds) DeepCopy() *NodeResourceUtilization
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PodLifeTime) DeepCopyInto(out *PodLifeTime) {
*out = *in
if in.MaxPodLifeTimeSeconds != nil {
in, out := &in.MaxPodLifeTimeSeconds, &out.MaxPodLifeTimeSeconds
*out = new(uint)
**out = **in
}
if in.PodStatusPhases != nil {
in, out := &in.PodStatusPhases, &out.PodStatusPhases
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodLifeTime.
func (in *PodLifeTime) DeepCopy() *PodLifeTime {
if in == nil {
return nil
}
out := new(PodLifeTime)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PodsHavingTooManyRestarts) DeepCopyInto(out *PodsHavingTooManyRestarts) {
*out = *in
@@ -206,16 +326,36 @@ func (in *StrategyParameters) DeepCopyInto(out *StrategyParameters) {
*out = new(PodsHavingTooManyRestarts)
**out = **in
}
if in.MaxPodLifeTimeSeconds != nil {
in, out := &in.MaxPodLifeTimeSeconds, &out.MaxPodLifeTimeSeconds
*out = new(uint)
**out = **in
if in.PodLifeTime != nil {
in, out := &in.PodLifeTime, &out.PodLifeTime
*out = new(PodLifeTime)
(*in).DeepCopyInto(*out)
}
if in.RemoveDuplicates != nil {
in, out := &in.RemoveDuplicates, &out.RemoveDuplicates
*out = new(RemoveDuplicates)
(*in).DeepCopyInto(*out)
}
if in.FailedPods != nil {
in, out := &in.FailedPods, &out.FailedPods
*out = new(FailedPods)
(*in).DeepCopyInto(*out)
}
if in.Namespaces != nil {
in, out := &in.Namespaces, &out.Namespaces
*out = new(Namespaces)
(*in).DeepCopyInto(*out)
}
if in.ThresholdPriority != nil {
in, out := &in.ThresholdPriority, &out.ThresholdPriority
*out = new(int32)
**out = **in
}
if in.LabelSelector != nil {
in, out := &in.LabelSelector, &out.LabelSelector
*out = new(v1.LabelSelector)
(*in).DeepCopyInto(*out)
}
return
}

View File

@@ -19,8 +19,6 @@ package componentconfig
import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"sigs.k8s.io/descheduler/pkg/descheduler/scheme"
)
var (
@@ -34,12 +32,6 @@ const GroupName = "deschedulercomponentconfig"
// SchemeGroupVersion is group version used to register these objects
var SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: runtime.APIVersionInternal}
func init() {
if err := addKnownTypes(scheme.Scheme); err != nil {
panic(err)
}
}
// Kind takes an unqualified kind and returns a Group qualified GroupKind
func Kind(kind string) schema.GroupKind {
return SchemeGroupVersion.WithKind(kind).GroupKind()

View File

@@ -20,6 +20,7 @@ import (
"time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
componentbaseconfig "k8s.io/component-base/config"
)
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
@@ -48,4 +49,11 @@ type DeschedulerConfiguration struct {
// EvictLocalStoragePods allows pods using local storage to be evicted.
EvictLocalStoragePods bool
// IgnorePVCPods sets whether PVC pods should be allowed to be evicted
IgnorePVCPods bool
// Logging specifies the options of logging.
// Refer [Logs Options](https://github.com/kubernetes/component-base/blob/master/logs/options.go) for more information.
Logging componentbaseconfig.LoggingConfiguration
}

View File

@@ -19,6 +19,8 @@ package v1alpha1
import (
"time"
componentbaseconfig "k8s.io/component-base/config"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)
@@ -48,4 +50,11 @@ type DeschedulerConfiguration struct {
// EvictLocalStoragePods allows pods using local storage to be evicted.
EvictLocalStoragePods bool `json:"evictLocalStoragePods,omitempty"`
// IgnorePVCPods sets whether PVC pods should be allowed to be evicted
IgnorePVCPods bool `json:"ignorePvcPods,omitempty"`
// Logging specifies the options of logging.
// Refer [Logs Options](https://github.com/kubernetes/component-base/blob/master/logs/options.go) for more information.
Logging componentbaseconfig.LoggingConfiguration `json:"logging,omitempty"`
}

View File

@@ -1,7 +1,8 @@
//go:build !ignore_autogenerated
// +build !ignore_autogenerated
/*
Copyright 2020 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -56,6 +57,8 @@ func autoConvert_v1alpha1_DeschedulerConfiguration_To_componentconfig_Deschedule
out.NodeSelector = in.NodeSelector
out.MaxNoOfPodsToEvictPerNode = in.MaxNoOfPodsToEvictPerNode
out.EvictLocalStoragePods = in.EvictLocalStoragePods
out.IgnorePVCPods = in.IgnorePVCPods
out.Logging = in.Logging
return nil
}
@@ -72,6 +75,8 @@ func autoConvert_componentconfig_DeschedulerConfiguration_To_v1alpha1_Deschedule
out.NodeSelector = in.NodeSelector
out.MaxNoOfPodsToEvictPerNode = in.MaxNoOfPodsToEvictPerNode
out.EvictLocalStoragePods = in.EvictLocalStoragePods
out.IgnorePVCPods = in.IgnorePVCPods
out.Logging = in.Logging
return nil
}

View File

@@ -1,7 +1,8 @@
//go:build !ignore_autogenerated
// +build !ignore_autogenerated
/*
Copyright 2020 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -28,6 +29,7 @@ import (
func (in *DeschedulerConfiguration) DeepCopyInto(out *DeschedulerConfiguration) {
*out = *in
out.TypeMeta = in.TypeMeta
in.Logging.DeepCopyInto(&out.Logging)
return
}

View File

@@ -1,7 +1,8 @@
//go:build !ignore_autogenerated
// +build !ignore_autogenerated
/*
Copyright 2020 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@@ -1,7 +1,8 @@
//go:build !ignore_autogenerated
// +build !ignore_autogenerated
/*
Copyright 2020 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -28,6 +29,7 @@ import (
func (in *DeschedulerConfiguration) DeepCopyInto(out *DeschedulerConfiguration) {
*out = *in
out.TypeMeta = in.TypeMeta
in.Logging.DeepCopyInto(&out.Logging)
return
}

View File

@@ -20,23 +20,36 @@ import (
"context"
"fmt"
"k8s.io/api/core/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
v1 "k8s.io/api/core/v1"
policy "k8s.io/api/policy/v1beta1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/informers"
clientset "k8s.io/client-go/kubernetes"
fakeclientset "k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"k8s.io/klog/v2"
corev1informers "k8s.io/client-go/informers/core/v1"
schedulingv1informers "k8s.io/client-go/informers/scheduling/v1"
"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
"sigs.k8s.io/descheduler/metrics"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/client"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
eutils "sigs.k8s.io/descheduler/pkg/descheduler/evictions/utils"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/descheduler/strategies"
"sigs.k8s.io/descheduler/pkg/descheduler/strategies/nodeutilization"
)
func Run(rs *options.DeschedulerServer) error {
ctx := context.Background()
func Run(ctx context.Context, rs *options.DeschedulerServer) error {
metrics.Register()
rsclient, err := client.CreateClient(rs.KubeconfigFile)
if err != nil {
return err
@@ -56,58 +69,227 @@ func Run(rs *options.DeschedulerServer) error {
return err
}
// tie in root ctx with our wait stopChannel
stopChannel := make(chan struct{})
go func() {
<-ctx.Done()
close(stopChannel)
}()
return RunDeschedulerStrategies(ctx, rs, deschedulerPolicy, evictionPolicyGroupVersion, stopChannel)
}
type strategyFunction func(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor)
type strategyFunction func(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc)
func cachedClient(
realClient clientset.Interface,
podInformer corev1informers.PodInformer,
nodeInformer corev1informers.NodeInformer,
namespaceInformer corev1informers.NamespaceInformer,
priorityClassInformer schedulingv1informers.PriorityClassInformer,
) (clientset.Interface, error) {
fakeClient := fakeclientset.NewSimpleClientset()
// simulate a pod eviction by deleting a pod
fakeClient.PrependReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) {
if action.GetSubresource() == "eviction" {
createAct, matched := action.(core.CreateActionImpl)
if !matched {
return false, nil, fmt.Errorf("unable to convert action to core.CreateActionImpl")
}
eviction, matched := createAct.Object.(*policy.Eviction)
if !matched {
return false, nil, fmt.Errorf("unable to convert action object into *policy.Eviction")
}
if err := fakeClient.Tracker().Delete(action.GetResource(), eviction.GetNamespace(), eviction.GetName()); err != nil {
return false, nil, fmt.Errorf("unable to delete pod %v/%v: %v", eviction.GetNamespace(), eviction.GetName(), err)
}
return true, nil, nil
}
// fallback to the default reactor
return false, nil, nil
})
klog.V(3).Infof("Pulling resources for the cached client from the cluster")
pods, err := podInformer.Lister().List(labels.Everything())
if err != nil {
return nil, fmt.Errorf("unable to list pods: %v", err)
}
for _, item := range pods {
if _, err := fakeClient.CoreV1().Pods(item.Namespace).Create(context.TODO(), item, metav1.CreateOptions{}); err != nil {
return nil, fmt.Errorf("unable to copy pod: %v", err)
}
}
nodes, err := nodeInformer.Lister().List(labels.Everything())
if err != nil {
return nil, fmt.Errorf("unable to list nodes: %v", err)
}
for _, item := range nodes {
if _, err := fakeClient.CoreV1().Nodes().Create(context.TODO(), item, metav1.CreateOptions{}); err != nil {
return nil, fmt.Errorf("unable to copy node: %v", err)
}
}
namespaces, err := namespaceInformer.Lister().List(labels.Everything())
if err != nil {
return nil, fmt.Errorf("unable to list namespaces: %v", err)
}
for _, item := range namespaces {
if _, err := fakeClient.CoreV1().Namespaces().Create(context.TODO(), item, metav1.CreateOptions{}); err != nil {
return nil, fmt.Errorf("unable to copy node: %v", err)
}
}
priorityClasses, err := priorityClassInformer.Lister().List(labels.Everything())
if err != nil {
return nil, fmt.Errorf("unable to list priorityclasses: %v", err)
}
for _, item := range priorityClasses {
if _, err := fakeClient.SchedulingV1().PriorityClasses().Create(context.TODO(), item, metav1.CreateOptions{}); err != nil {
return nil, fmt.Errorf("unable to copy priorityclass: %v", err)
}
}
return fakeClient, nil
}
func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer, deschedulerPolicy *api.DeschedulerPolicy, evictionPolicyGroupVersion string, stopChannel chan struct{}) error {
sharedInformerFactory := informers.NewSharedInformerFactory(rs.Client, 0)
nodeInformer := sharedInformerFactory.Core().V1().Nodes()
podInformer := sharedInformerFactory.Core().V1().Pods()
namespaceInformer := sharedInformerFactory.Core().V1().Namespaces()
priorityClassInformer := sharedInformerFactory.Scheduling().V1().PriorityClasses()
// create the informers
namespaceInformer.Informer()
priorityClassInformer.Informer()
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {
return fmt.Errorf("build get pods assigned to node function error: %v", err)
}
sharedInformerFactory.Start(stopChannel)
sharedInformerFactory.WaitForCacheSync(stopChannel)
strategyFuncs := map[string]strategyFunction{
"RemoveDuplicates": strategies.RemoveDuplicatePods,
"LowNodeUtilization": strategies.LowNodeUtilization,
"RemovePodsViolatingInterPodAntiAffinity": strategies.RemovePodsViolatingInterPodAntiAffinity,
"RemovePodsViolatingNodeAffinity": strategies.RemovePodsViolatingNodeAffinity,
"RemovePodsViolatingNodeTaints": strategies.RemovePodsViolatingNodeTaints,
"RemovePodsHavingTooManyRestarts": strategies.RemovePodsHavingTooManyRestarts,
"PodLifeTime": strategies.PodLifeTime,
strategyFuncs := map[api.StrategyName]strategyFunction{
"RemoveDuplicates": strategies.RemoveDuplicatePods,
"LowNodeUtilization": nodeutilization.LowNodeUtilization,
"HighNodeUtilization": nodeutilization.HighNodeUtilization,
"RemovePodsViolatingInterPodAntiAffinity": strategies.RemovePodsViolatingInterPodAntiAffinity,
"RemovePodsViolatingNodeAffinity": strategies.RemovePodsViolatingNodeAffinity,
"RemovePodsViolatingNodeTaints": strategies.RemovePodsViolatingNodeTaints,
"RemovePodsHavingTooManyRestarts": strategies.RemovePodsHavingTooManyRestarts,
"PodLifeTime": strategies.PodLifeTime,
"RemovePodsViolatingTopologySpreadConstraint": strategies.RemovePodsViolatingTopologySpreadConstraint,
"RemoveFailedPods": strategies.RemoveFailedPods,
}
wait.Until(func() {
nodes, err := nodeutil.ReadyNodes(ctx, rs.Client, nodeInformer, rs.NodeSelector, stopChannel)
var nodeSelector string
if deschedulerPolicy.NodeSelector != nil {
nodeSelector = *deschedulerPolicy.NodeSelector
}
var evictLocalStoragePods bool
if deschedulerPolicy.EvictLocalStoragePods != nil {
evictLocalStoragePods = *deschedulerPolicy.EvictLocalStoragePods
}
evictBarePods := false
if deschedulerPolicy.EvictFailedBarePods != nil {
evictBarePods = *deschedulerPolicy.EvictFailedBarePods
if evictBarePods {
klog.V(1).InfoS("Warning: EvictFailedBarePods is set to True. This could cause eviction of pods without ownerReferences.")
}
}
evictSystemCriticalPods := false
if deschedulerPolicy.EvictSystemCriticalPods != nil {
evictSystemCriticalPods = *deschedulerPolicy.EvictSystemCriticalPods
if evictSystemCriticalPods {
klog.V(1).InfoS("Warning: EvictSystemCriticalPods is set to True. This could cause eviction of Kubernetes system pods.")
}
}
ignorePvcPods := false
if deschedulerPolicy.IgnorePVCPods != nil {
ignorePvcPods = *deschedulerPolicy.IgnorePVCPods
}
wait.NonSlidingUntil(func() {
nodes, err := nodeutil.ReadyNodes(ctx, rs.Client, nodeInformer, nodeSelector)
if err != nil {
klog.V(1).Infof("Unable to get ready nodes: %v", err)
klog.V(1).InfoS("Unable to get ready nodes", "err", err)
close(stopChannel)
return
}
if len(nodes) <= 1 {
klog.V(1).Infof("The cluster size is 0 or 1 meaning eviction causes service disruption or degradation. So aborting..")
klog.V(1).InfoS("The cluster size is 0 or 1 meaning eviction causes service disruption or degradation. So aborting..")
close(stopChannel)
return
}
var podEvictorClient clientset.Interface
// When the dry mode is enable, collect all the relevant objects (mostly pods) under a fake client.
// So when evicting pods while running multiple strategies in a row have the cummulative effect
// as is when evicting pods for real.
if rs.DryRun {
klog.V(3).Infof("Building a cached client from the cluster for the dry run")
// Create a new cache so we start from scratch without any leftovers
fakeClient, err := cachedClient(rs.Client, podInformer, nodeInformer, namespaceInformer, priorityClassInformer)
if err != nil {
klog.Error(err)
return
}
fakeSharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
getPodsAssignedToNode, err = podutil.BuildGetPodsAssignedToNodeFunc(fakeSharedInformerFactory.Core().V1().Pods())
if err != nil {
klog.Errorf("build get pods assigned to node function error: %v", err)
return
}
fakeCtx, cncl := context.WithCancel(context.TODO())
defer cncl()
fakeSharedInformerFactory.Start(fakeCtx.Done())
fakeSharedInformerFactory.WaitForCacheSync(fakeCtx.Done())
podEvictorClient = fakeClient
} else {
podEvictorClient = rs.Client
}
klog.V(3).Infof("Building a pod evictor")
podEvictor := evictions.NewPodEvictor(
rs.Client,
podEvictorClient,
evictionPolicyGroupVersion,
rs.DryRun,
rs.MaxNoOfPodsToEvictPerNode,
deschedulerPolicy.MaxNoOfPodsToEvictPerNode,
deschedulerPolicy.MaxNoOfPodsToEvictPerNamespace,
nodes,
rs.EvictLocalStoragePods,
evictLocalStoragePods,
evictSystemCriticalPods,
ignorePvcPods,
evictBarePods,
!rs.DisableMetrics,
)
for name, f := range strategyFuncs {
if strategy := deschedulerPolicy.Strategies[api.StrategyName(name)]; strategy.Enabled {
f(ctx, rs.Client, strategy, nodes, podEvictor)
for name, strategy := range deschedulerPolicy.Strategies {
if f, ok := strategyFuncs[name]; ok {
if strategy.Enabled {
f(ctx, rs.Client, strategy, nodes, podEvictor, getPodsAssignedToNode)
}
} else {
klog.ErrorS(fmt.Errorf("unknown strategy name"), "skipping strategy", "strategy", name)
}
}
klog.V(1).InfoS("Number of evicted pods", "totalEvicted", podEvictor.TotalEvicted())
// If there was no interval specified, send a signal to the stopChannel to end the wait.Until loop after 1 iteration
if rs.DeschedulingInterval.Seconds() == 0 {
close(stopChannel)

View File

@@ -7,7 +7,7 @@ import (
"testing"
"time"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
fakeclientset "k8s.io/client-go/kubernetes/fake"
@@ -38,18 +38,27 @@ func TestTaintsUpdated(t *testing.T) {
stopChannel := make(chan struct{})
defer close(stopChannel)
rs := options.NewDeschedulerServer()
rs, err := options.NewDeschedulerServer()
if err != nil {
t.Fatalf("Unable to initialize server: %v", err)
}
rs.Client = client
rs.DeschedulingInterval = 100 * time.Millisecond
errChan := make(chan error, 1)
defer close(errChan)
go func() {
err := RunDeschedulerStrategies(ctx, rs, dp, "v1beta1", stopChannel)
errChan <- err
}()
select {
case err := <-errChan:
if err != nil {
t.Fatalf("Unable to run descheduler strategies: %v", err)
}
}()
case <-time.After(1 * time.Second):
// Wait for few cycles and then verify the only pod still exists
}
// Wait for few cycles and then verify the only pod still exists
time.Sleep(300 * time.Millisecond)
pods, err := client.CoreV1().Pods(p1.Namespace).List(ctx, metav1.ListOptions{})
if err != nil {
t.Errorf("Unable to list pods: %v", err)

View File

@@ -25,12 +25,15 @@ import (
policy "k8s.io/api/policy/v1beta1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/errors"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
clientcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/client-go/tools/record"
"k8s.io/klog/v2"
"sigs.k8s.io/descheduler/metrics"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
@@ -42,80 +45,70 @@ const (
)
// nodePodEvictedCount keeps count of pods evicted on node
type nodePodEvictedCount map[*v1.Node]int
type nodePodEvictedCount map[*v1.Node]uint
type namespacePodEvictCount map[string]uint
type PodEvictor struct {
client clientset.Interface
policyGroupVersion string
dryRun bool
maxPodsToEvictPerNode int
nodepodCount nodePodEvictedCount
evictLocalStoragePods bool
client clientset.Interface
nodes []*v1.Node
policyGroupVersion string
dryRun bool
maxPodsToEvictPerNode *uint
maxPodsToEvictPerNamespace *uint
nodepodCount nodePodEvictedCount
namespacePodCount namespacePodEvictCount
evictFailedBarePods bool
evictLocalStoragePods bool
evictSystemCriticalPods bool
ignorePvcPods bool
metricsEnabled bool
}
func NewPodEvictor(
client clientset.Interface,
policyGroupVersion string,
dryRun bool,
maxPodsToEvictPerNode int,
maxPodsToEvictPerNode *uint,
maxPodsToEvictPerNamespace *uint,
nodes []*v1.Node,
evictLocalStoragePods bool,
evictSystemCriticalPods bool,
ignorePvcPods bool,
evictFailedBarePods bool,
metricsEnabled bool,
) *PodEvictor {
var nodePodCount = make(nodePodEvictedCount)
var namespacePodCount = make(namespacePodEvictCount)
for _, node := range nodes {
// Initialize podsEvicted till now with 0.
nodePodCount[node] = 0
}
return &PodEvictor{
client: client,
policyGroupVersion: policyGroupVersion,
dryRun: dryRun,
maxPodsToEvictPerNode: maxPodsToEvictPerNode,
nodepodCount: nodePodCount,
evictLocalStoragePods: evictLocalStoragePods,
client: client,
nodes: nodes,
policyGroupVersion: policyGroupVersion,
dryRun: dryRun,
maxPodsToEvictPerNode: maxPodsToEvictPerNode,
maxPodsToEvictPerNamespace: maxPodsToEvictPerNamespace,
nodepodCount: nodePodCount,
namespacePodCount: namespacePodCount,
evictLocalStoragePods: evictLocalStoragePods,
evictSystemCriticalPods: evictSystemCriticalPods,
evictFailedBarePods: evictFailedBarePods,
ignorePvcPods: ignorePvcPods,
metricsEnabled: metricsEnabled,
}
}
// IsEvictable checks if a pod is evictable or not.
func (pe *PodEvictor) IsEvictable(pod *v1.Pod) bool {
checkErrs := []error{}
if IsCriticalPod(pod) {
checkErrs = append(checkErrs, fmt.Errorf("pod is critical"))
}
ownerRefList := podutil.OwnerRef(pod)
if IsDaemonsetPod(ownerRefList) {
checkErrs = append(checkErrs, fmt.Errorf("pod is a DaemonSet pod"))
}
if len(ownerRefList) == 0 {
checkErrs = append(checkErrs, fmt.Errorf("pod does not have any ownerrefs"))
}
if !pe.evictLocalStoragePods && IsPodWithLocalStorage(pod) {
checkErrs = append(checkErrs, fmt.Errorf("pod has local storage and descheduler is not configured with --evict-local-storage-pods"))
}
if IsMirrorPod(pod) {
checkErrs = append(checkErrs, fmt.Errorf("pod is a mirror pod"))
}
if len(checkErrs) > 0 && !HaveEvictAnnotation(pod) {
klog.V(4).Infof("Pod %s in namespace %s is not evictable: Pod lacks an eviction annotation and fails the following checks: %v", pod.Name, pod.Namespace, errors.NewAggregate(checkErrs).Error())
return false
}
return true
}
// NodeEvicted gives a number of pods evicted for node
func (pe *PodEvictor) NodeEvicted(node *v1.Node) int {
func (pe *PodEvictor) NodeEvicted(node *v1.Node) uint {
return pe.nodepodCount[node]
}
// TotalEvicted gives a number of pods evicted through all nodes
func (pe *PodEvictor) TotalEvicted() int {
var total int
func (pe *PodEvictor) TotalEvicted() uint {
var total uint
for _, count := range pe.nodepodCount {
total += count
}
@@ -125,29 +118,48 @@ func (pe *PodEvictor) TotalEvicted() int {
// EvictPod returns non-nil error only when evicting a pod on a node is not
// possible (due to maxPodsToEvictPerNode constraint). Success is true when the pod
// is evicted on the server side.
func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, node *v1.Node, reasons ...string) (bool, error) {
var reason string
func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, node *v1.Node, strategy string, reasons ...string) (bool, error) {
reason := strategy
if len(reasons) > 0 {
reason = " (" + strings.Join(reasons, ", ") + ")"
reason += " (" + strings.Join(reasons, ", ") + ")"
}
if pe.maxPodsToEvictPerNode > 0 && pe.nodepodCount[node]+1 > pe.maxPodsToEvictPerNode {
return false, fmt.Errorf("Maximum number %v of evicted pods per %q node reached", pe.maxPodsToEvictPerNode, node.Name)
if pe.maxPodsToEvictPerNode != nil && pe.nodepodCount[node]+1 > *pe.maxPodsToEvictPerNode {
if pe.metricsEnabled {
metrics.PodsEvicted.With(map[string]string{"result": "maximum number of pods per node reached", "strategy": strategy, "namespace": pod.Namespace, "node": node.Name}).Inc()
}
return false, fmt.Errorf("Maximum number %v of evicted pods per %q node reached", *pe.maxPodsToEvictPerNode, node.Name)
}
err := evictPod(ctx, pe.client, pod, pe.policyGroupVersion, pe.dryRun)
if pe.maxPodsToEvictPerNamespace != nil && pe.namespacePodCount[pod.Namespace]+1 > *pe.maxPodsToEvictPerNamespace {
if pe.metricsEnabled {
metrics.PodsEvicted.With(map[string]string{"result": "maximum number of pods per namespace reached", "strategy": strategy, "namespace": pod.Namespace, "node": node.Name}).Inc()
}
return false, fmt.Errorf("Maximum number %v of evicted pods per %q namespace reached", *pe.maxPodsToEvictPerNamespace, pod.Namespace)
}
err := evictPod(ctx, pe.client, pod, pe.policyGroupVersion)
if err != nil {
// err is used only for logging purposes
klog.Errorf("Error evicting pod: %#v in namespace %#v%s: %#v", pod.Name, pod.Namespace, reason, err)
klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod), "reason", reason)
if pe.metricsEnabled {
metrics.PodsEvicted.With(map[string]string{"result": "error", "strategy": strategy, "namespace": pod.Namespace, "node": node.Name}).Inc()
}
return false, nil
}
pe.nodepodCount[node]++
pe.namespacePodCount[pod.Namespace]++
if pe.metricsEnabled {
metrics.PodsEvicted.With(map[string]string{"result": "success", "strategy": strategy, "namespace": pod.Namespace, "node": node.Name}).Inc()
}
if pe.dryRun {
klog.V(1).Infof("Evicted pod in dry run mode: %#v in namespace %#v%s", pod.Name, pod.Namespace, reason)
klog.V(1).InfoS("Evicted pod in dry run mode", "pod", klog.KObj(pod), "reason", reason)
} else {
klog.V(1).Infof("Evicted pod: %#v in namespace %#v%s", pod.Name, pod.Namespace, reason)
klog.V(1).InfoS("Evicted pod", "pod", klog.KObj(pod), "reason", reason)
eventBroadcaster := record.NewBroadcaster()
eventBroadcaster.StartLogging(klog.V(3).Infof)
eventBroadcaster.StartStructuredLogging(3)
eventBroadcaster.StartRecordingToSink(&clientcorev1.EventSinkImpl{Interface: pe.client.CoreV1().Events(pod.Namespace)})
r := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "sigs.k8s.io.descheduler"})
r.Event(pod, v1.EventTypeNormal, "Descheduled", fmt.Sprintf("pod evicted by sigs.k8s.io/descheduler%s", reason))
@@ -155,10 +167,7 @@ func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, node *v1.Node,
return true, nil
}
func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, policyGroupVersion string, dryRun bool) error {
if dryRun {
return nil
}
func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, policyGroupVersion string) error {
deleteOptions := &metav1.DeleteOptions{}
// GracePeriodSeconds ?
eviction := &policy.Eviction{
@@ -183,22 +192,161 @@ func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, poli
return err
}
func IsCriticalPod(pod *v1.Pod) bool {
return utils.IsCriticalPod(pod)
type Options struct {
priority *int32
nodeFit bool
labelSelector labels.Selector
}
func IsDaemonsetPod(ownerRefList []metav1.OwnerReference) bool {
for _, ownerRef := range ownerRefList {
if ownerRef.Kind == "DaemonSet" {
return true
// WithPriorityThreshold sets a threshold for pod's priority class.
// Any pod whose priority class is lower is evictable.
func WithPriorityThreshold(priority int32) func(opts *Options) {
return func(opts *Options) {
var p int32 = priority
opts.priority = &p
}
}
// WithNodeFit sets whether or not to consider taints, node selectors,
// and pod affinity when evicting. A pod whose tolerations, node selectors,
// and affinity match a node other than the one it is currently running on
// is evictable.
func WithNodeFit(nodeFit bool) func(opts *Options) {
return func(opts *Options) {
opts.nodeFit = nodeFit
}
}
// WithLabelSelector sets whether or not to apply label filtering when evicting.
// Any pod matching the label selector is considered evictable.
func WithLabelSelector(labelSelector labels.Selector) func(opts *Options) {
return func(opts *Options) {
opts.labelSelector = labelSelector
}
}
type constraint func(pod *v1.Pod) error
type evictable struct {
constraints []constraint
}
// Evictable provides an implementation of IsEvictable(IsEvictable(pod *v1.Pod) bool).
// The method accepts a list of options which allow to extend constraints
// which decides when a pod is considered evictable.
func (pe *PodEvictor) Evictable(opts ...func(opts *Options)) *evictable {
options := &Options{}
for _, opt := range opts {
opt(options)
}
ev := &evictable{}
if pe.evictFailedBarePods {
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
ownerRefList := podutil.OwnerRef(pod)
// Enable evictFailedBarePods to evict bare pods in failed phase
if len(ownerRefList) == 0 && pod.Status.Phase != v1.PodFailed {
return fmt.Errorf("pod does not have any ownerRefs and is not in failed phase")
}
return nil
})
} else {
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
ownerRefList := podutil.OwnerRef(pod)
// Moved from IsEvictable function for backward compatibility
if len(ownerRefList) == 0 {
return fmt.Errorf("pod does not have any ownerRefs")
}
return nil
})
}
if !pe.evictSystemCriticalPods {
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
// Moved from IsEvictable function to allow for disabling
if utils.IsCriticalPriorityPod(pod) {
return fmt.Errorf("pod has system critical priority")
}
return nil
})
if options.priority != nil {
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
if IsPodEvictableBasedOnPriority(pod, *options.priority) {
return nil
}
return fmt.Errorf("pod has higher priority than specified priority class threshold")
})
}
}
return false
if !pe.evictLocalStoragePods {
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
if utils.IsPodWithLocalStorage(pod) {
return fmt.Errorf("pod has local storage and descheduler is not configured with evictLocalStoragePods")
}
return nil
})
}
if pe.ignorePvcPods {
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
if utils.IsPodWithPVC(pod) {
return fmt.Errorf("pod has a PVC and descheduler is configured to ignore PVC pods")
}
return nil
})
}
if options.nodeFit {
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
if !nodeutil.PodFitsAnyOtherNode(pod, pe.nodes) {
return fmt.Errorf("pod does not fit on any other node because of nodeSelector(s), Taint(s), or nodes marked as unschedulable")
}
return nil
})
}
if options.labelSelector != nil && !options.labelSelector.Empty() {
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
if !options.labelSelector.Matches(labels.Set(pod.Labels)) {
return fmt.Errorf("pod labels do not match the labelSelector filter in the policy parameter")
}
return nil
})
}
return ev
}
// IsMirrorPod checks whether the pod is a mirror pod.
func IsMirrorPod(pod *v1.Pod) bool {
return utils.IsMirrorPod(pod)
// IsEvictable decides when a pod is evictable
func (ev *evictable) IsEvictable(pod *v1.Pod) bool {
checkErrs := []error{}
ownerRefList := podutil.OwnerRef(pod)
if utils.IsDaemonsetPod(ownerRefList) {
checkErrs = append(checkErrs, fmt.Errorf("pod is a DaemonSet pod"))
}
if utils.IsMirrorPod(pod) {
checkErrs = append(checkErrs, fmt.Errorf("pod is a mirror pod"))
}
if utils.IsStaticPod(pod) {
checkErrs = append(checkErrs, fmt.Errorf("pod is a static pod"))
}
if utils.IsPodTerminating(pod) {
checkErrs = append(checkErrs, fmt.Errorf("pod is terminating"))
}
for _, c := range ev.constraints {
if err := c(pod); err != nil {
checkErrs = append(checkErrs, err)
}
}
if len(checkErrs) > 0 && !HaveEvictAnnotation(pod) {
klog.V(4).InfoS("Pod lacks an eviction annotation and fails the following checks", "pod", klog.KObj(pod), "checks", errors.NewAggregate(checkErrs).Error())
return false
}
return true
}
// HaveEvictAnnotation checks if the pod have evict annotation
@@ -207,12 +355,7 @@ func HaveEvictAnnotation(pod *v1.Pod) bool {
return found
}
func IsPodWithLocalStorage(pod *v1.Pod) bool {
for _, volume := range pod.Spec.Volumes {
if volume.HostPath != nil || volume.EmptyDir != nil {
return true
}
}
return false
// IsPodEvictableBasedOnPriority checks if the given pod is evictable based on priority resolved from pod Spec.
func IsPodEvictableBasedOnPriority(pod *v1.Pod, priority int32) bool {
return pod.Spec.Priority == nil || *pod.Spec.Priority < priority
}

View File

@@ -62,7 +62,7 @@ func TestEvictPod(t *testing.T) {
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
return true, &v1.PodList{Items: test.pods}, nil
})
got := evictPod(ctx, fakeClient, test.pod, "v1", false)
got := evictPod(ctx, fakeClient, test.pod, "v1")
if got != test.want {
t.Errorf("Test error for Desc: %s. Expected %v pod eviction to be %v, got %v", test.description, test.pod.Name, test.want, got)
}
@@ -71,47 +71,101 @@ func TestEvictPod(t *testing.T) {
func TestIsEvictable(t *testing.T) {
n1 := test.BuildTestNode("node1", 1000, 2000, 13, nil)
lowPriority := int32(800)
highPriority := int32(900)
nodeTaintKey := "hardware"
nodeTaintValue := "gpu"
nodeLabelKey := "datacenter"
nodeLabelValue := "east"
type testCase struct {
pod *v1.Pod
runBefore func(*v1.Pod)
evictLocalStoragePods bool
result bool
pod *v1.Pod
nodes []*v1.Node
runBefore func(*v1.Pod, []*v1.Node)
evictFailedBarePods bool
evictLocalStoragePods bool
evictSystemCriticalPods bool
priorityThreshold *int32
nodeFit bool
result bool
}
testCases := []testCase{
{
{ // Failed pod eviction with no ownerRefs.
pod: test.BuildTestPod("bare_pod_failed", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.Status.Phase = v1.PodFailed
},
evictFailedBarePods: false,
result: false,
}, { // Normal pod eviction with no ownerRefs and evictFailedBarePods enabled
pod: test.BuildTestPod("bare_pod", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
},
evictFailedBarePods: true,
result: false,
}, { // Failed pod eviction with no ownerRefs
pod: test.BuildTestPod("bare_pod_failed_but_can_be_evicted", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.Status.Phase = v1.PodFailed
},
evictFailedBarePods: true,
result: true,
}, { // Normal pod eviction with normal ownerRefs
pod: test.BuildTestPod("p1", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
},
evictLocalStoragePods: false,
result: true,
}, {
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Normal pod eviction with normal ownerRefs and descheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p2", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
},
evictLocalStoragePods: false,
result: true,
}, {
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Normal pod eviction with replicaSet ownerRefs
pod: test.BuildTestPod("p3", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
},
evictLocalStoragePods: false,
result: true,
}, {
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Normal pod eviction with replicaSet ownerRefs and descheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p4", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
},
evictLocalStoragePods: false,
result: true,
}, {
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Normal pod eviction with statefulSet ownerRefs
pod: test.BuildTestPod("p18", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetStatefulSetOwnerRefList()
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Normal pod eviction with statefulSet ownerRefs and descheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p19", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetStatefulSetOwnerRefList()
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Pod not evicted because it is bound to a PV and evictLocalStoragePods = false
pod: test.BuildTestPod("p5", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Volumes = []v1.Volume{
{
@@ -124,11 +178,12 @@ func TestIsEvictable(t *testing.T) {
},
}
},
evictLocalStoragePods: false,
result: false,
}, {
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: false,
}, { // Pod is evicted because it is bound to a PV and evictLocalStoragePods = true
pod: test.BuildTestPod("p6", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Volumes = []v1.Volume{
{
@@ -141,11 +196,12 @@ func TestIsEvictable(t *testing.T) {
},
}
},
evictLocalStoragePods: true,
result: true,
}, {
evictLocalStoragePods: true,
evictSystemCriticalPods: false,
result: true,
}, { // Pod is evicted because it is bound to a PV and evictLocalStoragePods = false, but it has scheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p7", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Volumes = []v1.Volume{
@@ -159,66 +215,244 @@ func TestIsEvictable(t *testing.T) {
},
}
},
evictLocalStoragePods: false,
result: true,
}, {
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Pod not evicted becasuse it is part of a daemonSet
pod: test.BuildTestPod("p8", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
},
evictLocalStoragePods: false,
result: false,
}, {
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: false,
}, { // Pod is evicted becasuse it is part of a daemonSet, but it has scheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p9", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
},
evictLocalStoragePods: false,
result: true,
}, {
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Pod not evicted becasuse it is a mirror pod
pod: test.BuildTestPod("p10", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Annotations = test.GetMirrorPodAnnotation()
},
evictLocalStoragePods: false,
result: false,
}, {
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: false,
}, { // Pod is evicted becasuse it is a mirror pod, but it has scheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p11", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Annotations = test.GetMirrorPodAnnotation()
pod.Annotations["descheduler.alpha.kubernetes.io/evict"] = "true"
},
evictLocalStoragePods: false,
result: true,
}, {
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Pod not evicted becasuse it has system critical priority
pod: test.BuildTestPod("p12", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
},
evictLocalStoragePods: false,
result: false,
}, {
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: false,
}, { // Pod is evicted becasuse it has system critical priority, but it has scheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p13", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
pod.Annotations = map[string]string{
"descheduler.alpha.kubernetes.io/evict": "true",
}
},
evictLocalStoragePods: false,
result: true,
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Pod not evicted becasuse it has a priority higher than the configured priority threshold
pod: test.BuildTestPod("p14", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Priority = &highPriority
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
priorityThreshold: &lowPriority,
result: false,
}, { // Pod is evicted becasuse it has a priority higher than the configured priority threshold, but it has scheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p15", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.Spec.Priority = &highPriority
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
priorityThreshold: &lowPriority,
result: true,
}, { // Pod is evicted becasuse it has system critical priority, but evictSystemCriticalPods = true
pod: test.BuildTestPod("p16", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
},
evictLocalStoragePods: false,
evictSystemCriticalPods: true,
result: true,
}, { // Pod is evicted becasuse it has system critical priority, but evictSystemCriticalPods = true and it has scheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p16", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
},
evictLocalStoragePods: false,
evictSystemCriticalPods: true,
result: true,
}, { // Pod is evicted becasuse it has a priority higher than the configured priority threshold, but evictSystemCriticalPods = true
pod: test.BuildTestPod("p17", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Priority = &highPriority
},
evictLocalStoragePods: false,
evictSystemCriticalPods: true,
priorityThreshold: &lowPriority,
result: true,
}, { // Pod is evicted becasuse it has a priority higher than the configured priority threshold, but evictSystemCriticalPods = true and it has scheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p17", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.Spec.Priority = &highPriority
},
evictLocalStoragePods: false,
evictSystemCriticalPods: true,
priorityThreshold: &lowPriority,
result: true,
}, { // Pod with no tolerations running on normal node, all other nodes tainted
pod: test.BuildTestPod("p1", 400, 0, n1.Name, nil),
nodes: []*v1.Node{test.BuildTestNode("node2", 1000, 2000, 13, nil), test.BuildTestNode("node3", 1000, 2000, 13, nil)},
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
for _, node := range nodes {
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
}
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
nodeFit: true,
result: false,
}, { // Pod with correct tolerations running on normal node, all other nodes tainted
pod: test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.Spec.Tolerations = []v1.Toleration{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
}),
nodes: []*v1.Node{test.BuildTestNode("node2", 1000, 2000, 13, nil), test.BuildTestNode("node3", 1000, 2000, 13, nil)},
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
for _, node := range nodes {
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
}
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
nodeFit: true,
result: true,
}, { // Pod with incorrect node selector
pod: test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: "fail",
}
}),
nodes: []*v1.Node{test.BuildTestNode("node2", 1000, 2000, 13, nil), test.BuildTestNode("node3", 1000, 2000, 13, nil)},
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
for _, node := range nodes {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
nodeFit: true,
result: false,
}, { // Pod with correct node selector
pod: test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
nodes: []*v1.Node{test.BuildTestNode("node2", 1000, 2000, 13, nil), test.BuildTestNode("node3", 1000, 2000, 13, nil)},
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
for _, node := range nodes {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
nodeFit: true,
result: true,
},
}
for _, test := range testCases {
test.runBefore(test.pod)
test.runBefore(test.pod, test.nodes)
nodes := append(test.nodes, n1)
podEvictor := &PodEvictor{
evictLocalStoragePods: test.evictLocalStoragePods,
evictLocalStoragePods: test.evictLocalStoragePods,
evictSystemCriticalPods: test.evictSystemCriticalPods,
evictFailedBarePods: test.evictFailedBarePods,
nodes: nodes,
}
result := podEvictor.IsEvictable(test.pod)
evictable := podEvictor.Evictable()
var opts []func(opts *Options)
if test.priorityThreshold != nil {
opts = append(opts, WithPriorityThreshold(*test.priorityThreshold))
}
if test.nodeFit {
opts = append(opts, WithNodeFit(true))
}
evictable = podEvictor.Evictable(opts...)
result := evictable.IsEvictable(test.pod)
if result != test.result {
t.Errorf("IsEvictable should return for pod %s %t, but it returns %t", test.pod.Name, test.result, result)
}
@@ -233,10 +467,6 @@ func TestPodTypes(t *testing.T) {
p2 := test.BuildTestPod("p2", 400, 0, n1.Name, nil)
p3 := test.BuildTestPod("p3", 400, 0, n1.Name, nil)
p4 := test.BuildTestPod("p4", 400, 0, n1.Name, nil)
p5 := test.BuildTestPod("p5", 400, 0, n1.Name, nil)
p6 := test.BuildTestPod("p6", 400, 0, n1.Name, nil)
p6.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
p1.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
// The following 4 pods won't get evicted.
@@ -257,27 +487,18 @@ func TestPodTypes(t *testing.T) {
}
// A Mirror Pod.
p4.Annotations = test.GetMirrorPodAnnotation()
// A Critical Pod.
p5.Namespace = "kube-system"
priority := utils.SystemCriticalPriority
p5.Spec.Priority = &priority
systemCriticalPriority := utils.SystemCriticalPriority
p5.Spec.Priority = &systemCriticalPriority
if !IsMirrorPod(p4) {
if !utils.IsMirrorPod(p4) {
t.Errorf("Expected p4 to be a mirror pod.")
}
if !IsCriticalPod(p5) {
t.Errorf("Expected p5 to be a critical pod.")
}
if !IsPodWithLocalStorage(p3) {
if !utils.IsPodWithLocalStorage(p3) {
t.Errorf("Expected p3 to be a pod with local storage.")
}
ownerRefList := podutil.OwnerRef(p2)
if !IsDaemonsetPod(ownerRefList) {
if !utils.IsDaemonsetPod(ownerRefList) {
t.Errorf("Expected p2 to be a daemonset pod.")
}
ownerRefList = podutil.OwnerRef(p1)
if IsDaemonsetPod(ownerRefList) || IsPodWithLocalStorage(p1) || IsCriticalPod(p1) || IsMirrorPod(p1) {
if utils.IsDaemonsetPod(ownerRefList) || utils.IsPodWithLocalStorage(p1) || utils.IsCriticalPriorityPod(p1) || utils.IsMirrorPod(p1) || utils.IsStaticPod(p1) {
t.Errorf("Expected p1 to be a normal pod.")
}

View File

@@ -18,7 +18,8 @@ package node
import (
"context"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
coreinformers "k8s.io/client-go/informers/core/v1"
@@ -29,7 +30,7 @@ import (
// ReadyNodes returns ready nodes irrespective of whether they are
// schedulable or not.
func ReadyNodes(ctx context.Context, client clientset.Interface, nodeInformer coreinformers.NodeInformer, nodeSelector string, stopChannel <-chan struct{}) ([]*v1.Node, error) {
func ReadyNodes(ctx context.Context, client clientset.Interface, nodeInformer coreinformers.NodeInformer, nodeSelector string) ([]*v1.Node, error) {
ns, err := labels.Parse(nodeSelector)
if err != nil {
return []*v1.Node{}, err
@@ -42,7 +43,7 @@ func ReadyNodes(ctx context.Context, client clientset.Interface, nodeInformer co
}
if len(nodes) == 0 {
klog.V(2).Infof("node lister returned empty list, now fetch directly")
klog.V(2).InfoS("Node lister returned empty list, now fetch directly")
nItems, err := client.CoreV1().Nodes().List(ctx, metav1.ListOptions{LabelSelector: nodeSelector})
if err != nil {
@@ -77,25 +78,56 @@ func IsReady(node *v1.Node) bool {
// - NodeOutOfDisk condition status is ConditionFalse,
// - NodeNetworkUnavailable condition status is ConditionFalse.
if cond.Type == v1.NodeReady && cond.Status != v1.ConditionTrue {
klog.V(1).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status)
klog.V(1).InfoS("Ignoring node", "node", klog.KObj(node), "condition", cond.Type, "status", cond.Status)
return false
} /*else if cond.Type == v1.NodeOutOfDisk && cond.Status != v1.ConditionFalse {
klog.V(4).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status)
klog.V(4).InfoS("Ignoring node with condition status", "node", klog.KObj(node.Name), "condition", cond.Type, "status", cond.Status)
return false
} else if cond.Type == v1.NodeNetworkUnavailable && cond.Status != v1.ConditionFalse {
klog.V(4).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status)
klog.V(4).InfoS("Ignoring node with condition status", "node", klog.KObj(node.Name), "condition", cond.Type, "status", cond.Status)
return false
}*/
}
// Ignore nodes that are marked unschedulable
/*if node.Spec.Unschedulable {
klog.V(4).Infof("Ignoring node %v since it is unschedulable", node.Name)
klog.V(4).InfoS("Ignoring node since it is unschedulable", "node", klog.KObj(node.Name))
return false
}*/
return true
}
// IsNodeUnschedulable checks if the node is unschedulable. This is helper function to check only in case of
// PodFitsAnyOtherNode checks if the given pod fits any of the given nodes, besides the node
// the pod is already running on. The node fit is based on multiple criteria, like, pod node selector
// matching the node label (including affinity), the taints on the node, and the node being schedulable or not.
func PodFitsAnyOtherNode(pod *v1.Pod, nodes []*v1.Node) bool {
for _, node := range nodes {
// Skip node pod is already on
if node.Name == pod.Spec.NodeName {
continue
}
// Check node selector and required affinity
ok, err := utils.PodMatchNodeSelector(pod, node)
if err != nil || !ok {
continue
}
// Check taints (we only care about NoSchedule and NoExecute taints)
ok = utils.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, node.Spec.Taints, func(taint *v1.Taint) bool {
return taint.Effect == v1.TaintEffectNoSchedule || taint.Effect == v1.TaintEffectNoExecute
})
if !ok {
continue
}
// Check if node is schedulable
if !IsNodeUnschedulable(node) {
klog.V(2).InfoS("Pod can possibly be scheduled on a different node", "pod", klog.KObj(pod), "node", klog.KObj(node))
return true
}
}
return false
}
// IsNodeUnschedulable checks if the node is unschedulable. This is a helper function to check only in case of
// underutilized node so that they won't be accounted for.
func IsNodeUnschedulable(node *v1.Node) bool {
return node.Spec.Unschedulable
@@ -112,7 +144,7 @@ func PodFitsAnyNode(pod *v1.Pod, nodes []*v1.Node) bool {
continue
}
if !IsNodeUnschedulable(node) {
klog.V(2).Infof("Pod %v can possibly be scheduled on %v", pod.Name, node.Name)
klog.V(2).InfoS("Pod can possibly be scheduled on a different node", "pod", klog.KObj(pod), "node", klog.KObj(node))
return true
}
}
@@ -125,15 +157,15 @@ func PodFitsCurrentNode(pod *v1.Pod, node *v1.Node) bool {
ok, err := utils.PodMatchNodeSelector(pod, node)
if err != nil {
klog.Error(err)
klog.ErrorS(err, "Failed to match node selector")
return false
}
if !ok {
klog.V(2).Infof("Pod %v does not fit on node %v", pod.Name, node.Name)
klog.V(2).InfoS("Pod does not fit on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
return false
}
klog.V(2).Infof("Pod %v fits on node %v", pod.Name, node.Name)
klog.V(2).InfoS("Pod fits on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
return true
}

View File

@@ -20,7 +20,7 @@ import (
"context"
"testing"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake"
@@ -69,12 +69,12 @@ func TestReadyNodesWithNodeSelector(t *testing.T) {
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
nodeInformer := sharedInformerFactory.Core().V1().Nodes()
stopChannel := make(chan struct{}, 0)
stopChannel := make(chan struct{})
sharedInformerFactory.Start(stopChannel)
sharedInformerFactory.WaitForCacheSync(stopChannel)
defer close(stopChannel)
nodes, _ := ReadyNodes(ctx, fakeClient, nodeInformer, nodeSelector, nil)
nodes, _ := ReadyNodes(ctx, fakeClient, nodeInformer, nodeSelector)
if nodes[0].Name != "node1" {
t.Errorf("Expected node1, got %s", nodes[0].Name)
@@ -200,10 +200,15 @@ func TestPodFitsCurrentNode(t *testing.T) {
}
}
func TestPodFitsAnyNode(t *testing.T) {
func TestPodFitsAnyOtherNode(t *testing.T) {
nodeLabelKey := "kubernetes.io/desiredNode"
nodeLabelValue := "yes"
nodeTaintKey := "hardware"
nodeTaintValue := "gpu"
// Staging node has no scheduling restrictions, but the pod always starts here and PodFitsAnyOtherNode() doesn't take into account the node the pod is running on.
nodeNames := []string{"node1", "node2", "stagingNode"}
tests := []struct {
description string
@@ -212,33 +217,12 @@ func TestPodFitsAnyNode(t *testing.T) {
success bool
}{
{
description: "Pod expected to fit one of the nodes",
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: nodeLabelKey,
Operator: "In",
Values: []string{
nodeLabelValue,
},
},
},
},
},
},
},
},
},
},
description: "Pod fits another node matching node affinity",
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue),
nodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[0],
Labels: map[string]string{
nodeLabelKey: nodeLabelValue,
},
@@ -246,42 +230,27 @@ func TestPodFitsAnyNode(t *testing.T) {
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[1],
Labels: map[string]string{
nodeLabelKey: "no",
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[2],
},
},
},
success: true,
},
{
description: "Pod expected to fit one of the nodes (error node first)",
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: nodeLabelKey,
Operator: "In",
Values: []string{
nodeLabelValue,
},
},
},
},
},
},
},
},
},
},
description: "Pod expected to fit one of the nodes",
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue),
nodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[0],
Labels: map[string]string{
nodeLabelKey: "no",
},
@@ -289,42 +258,27 @@ func TestPodFitsAnyNode(t *testing.T) {
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[1],
Labels: map[string]string{
nodeLabelKey: nodeLabelValue,
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[2],
},
},
},
success: true,
},
{
description: "Pod expected to fit none of the nodes",
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: nodeLabelKey,
Operator: "In",
Values: []string{
nodeLabelValue,
},
},
},
},
},
},
},
},
},
},
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue),
nodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[0],
Labels: map[string]string{
nodeLabelKey: "unfit1",
},
@@ -332,42 +286,27 @@ func TestPodFitsAnyNode(t *testing.T) {
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[1],
Labels: map[string]string{
nodeLabelKey: "unfit2",
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[2],
},
},
},
success: false,
},
{
description: "Nodes are unschedulable but labels match, should fail",
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: nodeLabelKey,
Operator: "In",
Values: []string{
nodeLabelValue,
},
},
},
},
},
},
},
},
},
},
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue),
nodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[0],
Labels: map[string]string{
nodeLabelKey: nodeLabelValue,
},
@@ -378,20 +317,136 @@ func TestPodFitsAnyNode(t *testing.T) {
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[1],
Labels: map[string]string{
nodeLabelKey: "no",
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[2],
},
},
},
success: false,
},
{
description: "Two nodes matches node selector, one of them is tained, should pass",
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue),
nodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[0],
Labels: map[string]string{
nodeLabelKey: nodeLabelValue,
},
},
Spec: v1.NodeSpec{
Taints: []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[1],
Labels: map[string]string{
nodeLabelKey: nodeLabelValue,
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[2],
},
},
},
success: true,
},
{
description: "Both nodes are tained, should fail",
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue),
nodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[0],
Labels: map[string]string{
nodeLabelKey: nodeLabelValue,
},
},
Spec: v1.NodeSpec{
Taints: []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[1],
Labels: map[string]string{
nodeLabelKey: nodeLabelValue,
},
},
Spec: v1.NodeSpec{
Taints: []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoExecute,
},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[2],
},
},
},
success: false,
},
}
for _, tc := range tests {
actual := PodFitsAnyNode(tc.pod, tc.nodes)
actual := PodFitsAnyOtherNode(tc.pod, tc.nodes)
if actual != tc.success {
t.Errorf("Test %#v failed", tc.description)
}
}
}
func createPodManifest(nodeName string, nodeSelectorKey string, nodeSelectorValue string) *v1.Pod {
return (&v1.Pod{
Spec: v1.PodSpec{
NodeName: nodeName,
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: nodeSelectorKey,
Operator: "In",
Values: []string{
nodeSelectorValue,
},
},
},
},
},
},
},
},
},
})
}

View File

@@ -17,39 +17,174 @@ limitations under the License.
package pod
import (
"context"
"sort"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
coreinformers "k8s.io/client-go/informers/core/v1"
"k8s.io/client-go/tools/cache"
"sigs.k8s.io/descheduler/pkg/utils"
"sort"
)
// ListPodsOnANode lists all of the pods on a node
// It also accepts an optional "filter" function which can be used to further limit the pods that are returned.
// (Usually this is podEvictor.IsEvictable, in order to only list the evictable pods on a node, but can
// be used by strategies to extend IsEvictable if there are further restrictions, such as with NodeAffinity).
// The filter function should return true if the pod should be returned from ListPodsOnANode
func ListPodsOnANode(ctx context.Context, client clientset.Interface, node *v1.Node, filter func(pod *v1.Pod) bool) ([]*v1.Pod, error) {
fieldSelector, err := fields.ParseSelector("spec.nodeName=" + node.Name + ",status.phase!=" + string(v1.PodSucceeded) + ",status.phase!=" + string(v1.PodFailed))
if err != nil {
return []*v1.Pod{}, err
}
const (
nodeNameKeyIndex = "spec.nodeName"
)
podList, err := client.CoreV1().Pods(v1.NamespaceAll).List(ctx,
metav1.ListOptions{FieldSelector: fieldSelector.String()})
if err != nil {
return []*v1.Pod{}, err
}
// FilterFunc is a filter for a pod.
type FilterFunc func(*v1.Pod) bool
pods := make([]*v1.Pod, 0)
for i := range podList.Items {
if filter != nil && !filter(&podList.Items[i]) {
continue
// GetPodsAssignedToNodeFunc is a function which accept a node name and a pod filter function
// as input and returns the pods that assigned to the node.
type GetPodsAssignedToNodeFunc func(string, FilterFunc) ([]*v1.Pod, error)
// WrapFilterFuncs wraps a set of FilterFunc in one.
func WrapFilterFuncs(filters ...FilterFunc) FilterFunc {
return func(pod *v1.Pod) bool {
for _, filter := range filters {
if filter != nil && !filter(pod) {
return false
}
}
pods = append(pods, &podList.Items[i])
return true
}
}
type Options struct {
filter FilterFunc
includedNamespaces sets.String
excludedNamespaces sets.String
labelSelector *metav1.LabelSelector
}
// NewOptions returns an empty Options.
func NewOptions() *Options {
return &Options{}
}
// WithFilter sets a pod filter.
// The filter function should return true if the pod should be returned from ListPodsOnANode
func (o *Options) WithFilter(filter FilterFunc) *Options {
o.filter = filter
return o
}
// WithNamespaces sets included namespaces
func (o *Options) WithNamespaces(namespaces sets.String) *Options {
o.includedNamespaces = namespaces
return o
}
// WithoutNamespaces sets excluded namespaces
func (o *Options) WithoutNamespaces(namespaces sets.String) *Options {
o.excludedNamespaces = namespaces
return o
}
// WithLabelSelector sets a pod label selector
func (o *Options) WithLabelSelector(labelSelector *metav1.LabelSelector) *Options {
o.labelSelector = labelSelector
return o
}
// BuildFilterFunc builds a final FilterFunc based on Options.
func (o *Options) BuildFilterFunc() (FilterFunc, error) {
var s labels.Selector
var err error
if o.labelSelector != nil {
s, err = metav1.LabelSelectorAsSelector(o.labelSelector)
if err != nil {
return nil, err
}
}
return func(pod *v1.Pod) bool {
if o.filter != nil && !o.filter(pod) {
return false
}
if len(o.includedNamespaces) > 0 && !o.includedNamespaces.Has(pod.Namespace) {
return false
}
if len(o.excludedNamespaces) > 0 && o.excludedNamespaces.Has(pod.Namespace) {
return false
}
if s != nil && !s.Matches(labels.Set(pod.GetLabels())) {
return false
}
return true
}, nil
}
// BuildGetPodsAssignedToNodeFunc establishes an indexer to map the pods and their assigned nodes.
// It returns a function to help us get all the pods that assigned to a node based on the indexer.
func BuildGetPodsAssignedToNodeFunc(podInformer coreinformers.PodInformer) (GetPodsAssignedToNodeFunc, error) {
// Establish an indexer to map the pods and their assigned nodes.
err := podInformer.Informer().AddIndexers(cache.Indexers{
nodeNameKeyIndex: func(obj interface{}) ([]string, error) {
pod, ok := obj.(*v1.Pod)
if !ok {
return []string{}, nil
}
if len(pod.Spec.NodeName) == 0 {
return []string{}, nil
}
return []string{pod.Spec.NodeName}, nil
},
})
if err != nil {
return nil, err
}
// The indexer helps us get all the pods that assigned to a node.
podIndexer := podInformer.Informer().GetIndexer()
getPodsAssignedToNode := func(nodeName string, filter FilterFunc) ([]*v1.Pod, error) {
objs, err := podIndexer.ByIndex(nodeNameKeyIndex, nodeName)
if err != nil {
return nil, err
}
pods := make([]*v1.Pod, 0, len(objs))
for _, obj := range objs {
pod, ok := obj.(*v1.Pod)
if !ok {
continue
}
if filter(pod) {
pods = append(pods, pod)
}
}
return pods, nil
}
return getPodsAssignedToNode, nil
}
// ListPodsOnANode lists all pods on a node.
// It also accepts a "filter" function which can be used to further limit the pods that are returned.
// (Usually this is podEvictor.Evictable().IsEvictable, in order to only list the evictable pods on a node, but can
// be used by strategies to extend it if there are further restrictions, such as with NodeAffinity).
func ListPodsOnANode(
nodeName string,
getPodsAssignedToNode GetPodsAssignedToNodeFunc,
filter FilterFunc,
) ([]*v1.Pod, error) {
// Succeeded and failed pods are not considered because they don't occupy any resource.
f := func(pod *v1.Pod) bool {
return pod.Status.Phase != v1.PodSucceeded && pod.Status.Phase != v1.PodFailed
}
return ListAllPodsOnANode(nodeName, getPodsAssignedToNode, WrapFilterFuncs(f, filter))
}
// ListAllPodsOnANode lists all the pods on a node no matter what the phase of the pod is.
func ListAllPodsOnANode(
nodeName string,
getPodsAssignedToNode GetPodsAssignedToNodeFunc,
filter FilterFunc,
) ([]*v1.Pod, error) {
pods, err := getPodsAssignedToNode(nodeName, filter)
if err != nil {
return []*v1.Pod{}, err
}
return pods, nil
}

View File

@@ -18,15 +18,15 @@ package pod
import (
"context"
"fmt"
"reflect"
"strings"
"testing"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"sigs.k8s.io/descheduler/test"
)
@@ -38,39 +38,80 @@ var (
func TestListPodsOnANode(t *testing.T) {
testCases := []struct {
name string
pods map[string][]v1.Pod
pods []*v1.Pod
node *v1.Node
labelSelector *metav1.LabelSelector
expectedPodCount int
}{
{
name: "test listing pods on a node",
pods: map[string][]v1.Pod{
"n1": {
*test.BuildTestPod("pod1", 100, 0, "n1", nil),
*test.BuildTestPod("pod2", 100, 0, "n1", nil),
},
"n2": {*test.BuildTestPod("pod3", 100, 0, "n2", nil)},
pods: []*v1.Pod{
test.BuildTestPod("pod1", 100, 0, "n1", nil),
test.BuildTestPod("pod2", 100, 0, "n1", nil),
test.BuildTestPod("pod3", 100, 0, "n2", nil),
},
node: test.BuildTestNode("n1", 2000, 3000, 10, nil),
labelSelector: nil,
expectedPodCount: 2,
},
{
name: "test listing pods with label selector",
pods: []*v1.Pod{
test.BuildTestPod("pod1", 100, 0, "n1", nil),
test.BuildTestPod("pod2", 100, 0, "n1", func(pod *v1.Pod) {
pod.Labels = map[string]string{"foo": "bar"}
}),
test.BuildTestPod("pod3", 100, 0, "n1", func(pod *v1.Pod) {
pod.Labels = map[string]string{"foo": "bar1"}
}),
test.BuildTestPod("pod4", 100, 0, "n2", nil),
},
node: test.BuildTestNode("n1", 2000, 3000, 10, nil),
labelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Key: "foo",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"bar", "bar1"},
},
},
},
expectedPodCount: 2,
},
}
for _, testCase := range testCases {
fakeClient := &fake.Clientset{}
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
list := action.(core.ListAction)
fieldString := list.GetListRestrictions().Fields.String()
if strings.Contains(fieldString, "n1") {
return true, &v1.PodList{Items: testCase.pods["n1"]}, nil
} else if strings.Contains(fieldString, "n2") {
return true, &v1.PodList{Items: testCase.pods["n2"]}, nil
t.Run(testCase.name, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
var objs []runtime.Object
objs = append(objs, testCase.node)
for _, pod := range testCase.pods {
objs = append(objs, pod)
}
fakeClient := fake.NewSimpleClientset(objs...)
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
podInformer := sharedInformerFactory.Core().V1().Pods()
getPodsAssignedToNode, err := BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {
t.Errorf("Build get pods assigned to node function error: %v", err)
}
sharedInformerFactory.Start(ctx.Done())
sharedInformerFactory.WaitForCacheSync(ctx.Done())
filter, err := NewOptions().WithLabelSelector(testCase.labelSelector).BuildFilterFunc()
if err != nil {
t.Errorf("Build filter function error: %v", err)
}
pods, _ := ListPodsOnANode(testCase.node.Name, getPodsAssignedToNode, filter)
if len(pods) != testCase.expectedPodCount {
t.Errorf("Expected %v pods on node %v, got %+v", testCase.expectedPodCount, testCase.node.Name, len(pods))
}
return true, nil, fmt.Errorf("Failed to list: %v", list)
})
pods, _ := ListPodsOnANode(context.TODO(), fakeClient, testCase.node, nil)
if len(pods) != testCase.expectedPodCount {
t.Errorf("expected %v pods on node %v, got %+v", testCase.expectedPodCount, testCase.node.Name, len(pods))
}
}
}

View File

@@ -30,7 +30,7 @@ import (
func LoadPolicyConfig(policyConfigFile string) (*api.DeschedulerPolicy, error) {
if policyConfigFile == "" {
klog.V(1).Infof("policy config file not specified")
klog.V(1).InfoS("Policy config file not specified")
return nil, nil
}

View File

@@ -19,9 +19,22 @@ package scheme
import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/serializer"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/api/v1alpha1"
"sigs.k8s.io/descheduler/pkg/apis/componentconfig"
componentconfigv1alpha1 "sigs.k8s.io/descheduler/pkg/apis/componentconfig/v1alpha1"
)
var (
Scheme = runtime.NewScheme()
Codecs = serializer.NewCodecFactory(Scheme)
)
func init() {
utilruntime.Must(api.AddToScheme(Scheme))
utilruntime.Must(v1alpha1.AddToScheme(Scheme))
utilruntime.Must(componentconfig.AddToScheme(Scheme))
utilruntime.Must(componentconfigv1alpha1.AddToScheme(Scheme))
}

View File

@@ -18,6 +18,8 @@ package strategies
import (
"context"
"fmt"
"math"
"reflect"
"sort"
"strings"
@@ -31,8 +33,29 @@ import (
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
)
func validateRemoveDuplicatePodsParams(params *api.StrategyParameters) error {
if params == nil {
return nil
}
// At most one of include/exclude can be set
if params.Namespaces != nil && len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 {
return fmt.Errorf("only one of Include/Exclude namespaces can be set")
}
if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" {
return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set")
}
return nil
}
type podOwner struct {
namespace, kind, name string
imagesHash string
}
// RemoveDuplicatePods removes the duplicate pods on node. This strategy evicts all duplicate pods on node.
// A pod is said to be a duplicate of other if both of them are from same creator, kind and are within the same
// namespace, and have at least one container with the same image.
@@ -43,16 +66,55 @@ func RemoveDuplicatePods(
strategy api.DeschedulerStrategy,
nodes []*v1.Node,
podEvictor *evictions.PodEvictor,
getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc,
) {
if err := validateRemoveDuplicatePodsParams(strategy.Params); err != nil {
klog.ErrorS(err, "Invalid RemoveDuplicatePods parameters")
return
}
thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params)
if err != nil {
klog.ErrorS(err, "Failed to get threshold priority from strategy's params")
return
}
var includedNamespaces, excludedNamespaces sets.String
if strategy.Params != nil && strategy.Params.Namespaces != nil {
includedNamespaces = sets.NewString(strategy.Params.Namespaces.Include...)
excludedNamespaces = sets.NewString(strategy.Params.Namespaces.Exclude...)
}
nodeFit := false
if strategy.Params != nil {
nodeFit = strategy.Params.NodeFit
}
evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit))
duplicatePods := make(map[podOwner]map[string][]*v1.Pod)
ownerKeyOccurence := make(map[podOwner]int32)
nodeCount := 0
nodeMap := make(map[string]*v1.Node)
podFilter, err := podutil.NewOptions().
WithFilter(evictable.IsEvictable).
WithNamespaces(includedNamespaces).
WithoutNamespaces(excludedNamespaces).
BuildFilterFunc()
if err != nil {
klog.ErrorS(err, "Error initializing pod filter function")
return
}
for _, node := range nodes {
klog.V(1).Infof("Processing node: %#v", node.Name)
pods, err := podutil.ListPodsOnANode(ctx, client, node, podEvictor.IsEvictable)
klog.V(1).InfoS("Processing node", "node", klog.KObj(node))
pods, err := podutil.ListPodsOnANode(node.Name, getPodsAssignedToNode, podFilter)
if err != nil {
klog.Errorf("error listing evictable pods on node %s: %+v", node.Name, err)
klog.ErrorS(err, "Error listing evictable pods on node", "node", klog.KObj(node))
continue
}
duplicatePods := make([]*v1.Pod, 0, len(pods))
nodeMap[node.Name] = node
nodeCount++
// Each pod has a list of owners and a list of containers, and each container has 1 image spec.
// For each pod, we go through all the OwnerRef/Image mappings and represent them as a "key" string.
// All of those mappings together makes a list of "key" strings that essentially represent that pod's uniqueness.
@@ -69,16 +131,29 @@ func RemoveDuplicatePods(
duplicateKeysMap := map[string][][]string{}
for _, pod := range pods {
ownerRefList := podutil.OwnerRef(pod)
if hasExcludedOwnerRefKind(ownerRefList, strategy) {
if hasExcludedOwnerRefKind(ownerRefList, strategy) || len(ownerRefList) == 0 {
continue
}
podContainerKeys := make([]string, 0, len(ownerRefList)*len(pod.Spec.Containers))
imageList := []string{}
for _, container := range pod.Spec.Containers {
imageList = append(imageList, container.Image)
}
sort.Strings(imageList)
imagesHash := strings.Join(imageList, "#")
for _, ownerRef := range ownerRefList {
for _, container := range pod.Spec.Containers {
ownerKey := podOwner{
namespace: pod.ObjectMeta.Namespace,
kind: ownerRef.Kind,
name: ownerRef.Name,
imagesHash: imagesHash,
}
ownerKeyOccurence[ownerKey] = ownerKeyOccurence[ownerKey] + 1
for _, image := range imageList {
// Namespace/Kind/Name should be unique for the cluster.
// We also consider the image, as 2 pods could have the same owner but serve different purposes
// So any non-unique Namespace/Kind/Name/Image pattern is a duplicate pod.
s := strings.Join([]string{pod.ObjectMeta.Namespace, ownerRef.Kind, ownerRef.Name, container.Image}, "/")
s := strings.Join([]string{pod.ObjectMeta.Namespace, ownerRef.Kind, ownerRef.Name, image}, "/")
podContainerKeys = append(podContainerKeys, s)
}
}
@@ -90,7 +165,19 @@ func RemoveDuplicatePods(
for _, keys := range existing {
if reflect.DeepEqual(keys, podContainerKeys) {
matched = true
duplicatePods = append(duplicatePods, pod)
klog.V(3).InfoS("Duplicate found", "pod", klog.KObj(pod))
for _, ownerRef := range ownerRefList {
ownerKey := podOwner{
namespace: pod.ObjectMeta.Namespace,
kind: ownerRef.Kind,
name: ownerRef.Name,
imagesHash: imagesHash,
}
if _, ok := duplicatePods[ownerKey]; !ok {
duplicatePods[ownerKey] = make(map[string][]*v1.Pod)
}
duplicatePods[ownerKey][node.Name] = append(duplicatePods[ownerKey][node.Name], pod)
}
break
}
}
@@ -103,16 +190,103 @@ func RemoveDuplicatePods(
duplicateKeysMap[podContainerKeys[0]] = [][]string{podContainerKeys}
}
}
}
for _, pod := range duplicatePods {
if _, err := podEvictor.EvictPod(ctx, pod, node, "RemoveDuplicatePods"); err != nil {
klog.Errorf("Error evicting pod: (%#v)", err)
break
// 1. how many pods can be evicted to respect uniform placement of pods among viable nodes?
for ownerKey, podNodes := range duplicatePods {
targetNodes := getTargetNodes(podNodes, nodes)
klog.V(2).InfoS("Adjusting feasible nodes", "owner", ownerKey, "from", nodeCount, "to", len(targetNodes))
if len(targetNodes) < 2 {
klog.V(1).InfoS("Less than two feasible nodes for duplicates to land, skipping eviction", "owner", ownerKey)
continue
}
upperAvg := int(math.Ceil(float64(ownerKeyOccurence[ownerKey]) / float64(len(targetNodes))))
for nodeName, pods := range podNodes {
klog.V(2).InfoS("Average occurrence per node", "node", klog.KObj(nodeMap[nodeName]), "ownerKey", ownerKey, "avg", upperAvg)
// list of duplicated pods does not contain the original referential pod
if len(pods)+1 > upperAvg {
// It's assumed all duplicated pods are in the same priority class
// TODO(jchaloup): check if the pod has a different node to lend to
for _, pod := range pods[upperAvg-1:] {
if _, err := podEvictor.EvictPod(ctx, pod, nodeMap[nodeName], "RemoveDuplicatePods"); err != nil {
klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod))
break
}
}
}
}
}
}
func getNodeAffinityNodeSelector(pod *v1.Pod) *v1.NodeSelector {
if pod.Spec.Affinity == nil {
return nil
}
if pod.Spec.Affinity.NodeAffinity == nil {
return nil
}
return pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution
}
func getTargetNodes(podNodes map[string][]*v1.Pod, nodes []*v1.Node) []*v1.Node {
// In order to reduce the number of pods processed, identify pods which have
// equal (tolerations, nodeselectors, node affinity) terms and considered them
// as identical. Identical pods wrt. (tolerations, nodeselectors, node affinity) terms
// will produce the same result when checking if a pod is feasible for a node.
// Thus, improving efficiency of processing pods marked for eviction.
// Collect all distinct pods which differ in at least taints, node affinity or node selector terms
distinctPods := map[*v1.Pod]struct{}{}
for _, pods := range podNodes {
for _, pod := range pods {
duplicated := false
for dp := range distinctPods {
if utils.TolerationsEqual(pod.Spec.Tolerations, dp.Spec.Tolerations) &&
utils.NodeSelectorsEqual(getNodeAffinityNodeSelector(pod), getNodeAffinityNodeSelector(dp)) &&
reflect.DeepEqual(pod.Spec.NodeSelector, dp.Spec.NodeSelector) {
duplicated = true
continue
}
}
if duplicated {
continue
}
distinctPods[pod] = struct{}{}
}
}
// For each distinct pod get a list of nodes where it can land
targetNodesMap := map[string]*v1.Node{}
for pod := range distinctPods {
matchingNodes := map[string]*v1.Node{}
for _, node := range nodes {
if !utils.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, node.Spec.Taints, func(taint *v1.Taint) bool {
return taint.Effect == v1.TaintEffectNoSchedule || taint.Effect == v1.TaintEffectNoExecute
}) {
continue
}
if match, err := utils.PodMatchNodeSelector(pod, node); err == nil && !match {
continue
}
matchingNodes[node.Name] = node
}
if len(matchingNodes) > 1 {
for nodeName := range matchingNodes {
targetNodesMap[nodeName] = matchingNodes[nodeName]
}
}
}
targetNodes := []*v1.Node{}
for _, node := range targetNodesMap {
targetNodes = append(targetNodes, node)
}
return targetNodes
}
func hasExcludedOwnerRefKind(ownerRefs []metav1.OwnerReference, strategy api.DeschedulerStrategy) bool {
if strategy.Params == nil || strategy.Params.RemoveDuplicates == nil {
return false

View File

@@ -20,46 +20,87 @@ import (
"context"
"testing"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
policyv1 "k8s.io/api/policy/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
"sigs.k8s.io/descheduler/test"
)
func buildTestPodWithImage(podName, node, image string) *v1.Pod {
pod := test.BuildTestPod(podName, 100, 0, node, test.SetRSOwnerRef)
pod.Spec.Containers = append(pod.Spec.Containers, v1.Container{
Name: image,
Image: image,
})
return pod
}
func TestFindDuplicatePods(t *testing.T) {
ctx := context.Background()
// first setup pods
node := test.BuildTestNode("n1", 2000, 3000, 10, nil)
p1 := test.BuildTestPod("p1", 100, 0, node.Name, nil)
node1 := test.BuildTestNode("n1", 2000, 3000, 10, nil)
node2 := test.BuildTestNode("n2", 2000, 3000, 10, nil)
node3 := test.BuildTestNode("n3", 2000, 3000, 10, func(node *v1.Node) {
node.Spec.Taints = []v1.Taint{
{
Key: "hardware",
Value: "gpu",
Effect: v1.TaintEffectNoSchedule,
},
}
})
node4 := test.BuildTestNode("n4", 2000, 3000, 10, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
"datacenter": "east",
}
})
node5 := test.BuildTestNode("n5", 2000, 3000, 10, func(node *v1.Node) {
node.Spec = v1.NodeSpec{
Unschedulable: true,
}
})
p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil)
p1.Namespace = "dev"
p2 := test.BuildTestPod("p2", 100, 0, node.Name, nil)
p2 := test.BuildTestPod("p2", 100, 0, node1.Name, nil)
p2.Namespace = "dev"
p3 := test.BuildTestPod("p3", 100, 0, node.Name, nil)
p3 := test.BuildTestPod("p3", 100, 0, node1.Name, nil)
p3.Namespace = "dev"
p4 := test.BuildTestPod("p4", 100, 0, node.Name, nil)
p5 := test.BuildTestPod("p5", 100, 0, node.Name, nil)
p6 := test.BuildTestPod("p6", 100, 0, node.Name, nil)
p7 := test.BuildTestPod("p7", 100, 0, node.Name, nil)
p4 := test.BuildTestPod("p4", 100, 0, node1.Name, nil)
p5 := test.BuildTestPod("p5", 100, 0, node1.Name, nil)
p6 := test.BuildTestPod("p6", 100, 0, node1.Name, nil)
p7 := test.BuildTestPod("p7", 100, 0, node1.Name, nil)
p7.Namespace = "kube-system"
p8 := test.BuildTestPod("p8", 100, 0, node.Name, nil)
p8 := test.BuildTestPod("p8", 100, 0, node1.Name, nil)
p8.Namespace = "test"
p9 := test.BuildTestPod("p9", 100, 0, node.Name, nil)
p9 := test.BuildTestPod("p9", 100, 0, node1.Name, nil)
p9.Namespace = "test"
p10 := test.BuildTestPod("p10", 100, 0, node.Name, nil)
p10 := test.BuildTestPod("p10", 100, 0, node1.Name, nil)
p10.Namespace = "test"
p11 := test.BuildTestPod("p11", 100, 0, node.Name, nil)
p11 := test.BuildTestPod("p11", 100, 0, node1.Name, nil)
p11.Namespace = "different-images"
p12 := test.BuildTestPod("p12", 100, 0, node.Name, nil)
p12 := test.BuildTestPod("p12", 100, 0, node1.Name, nil)
p12.Namespace = "different-images"
p13 := test.BuildTestPod("p13", 100, 0, node.Name, nil)
p13 := test.BuildTestPod("p13", 100, 0, node1.Name, nil)
p13.Namespace = "different-images"
p14 := test.BuildTestPod("p14", 100, 0, node.Name, nil)
p14 := test.BuildTestPod("p14", 100, 0, node1.Name, nil)
p14.Namespace = "different-images"
p15 := test.BuildTestPod("p15", 100, 0, node1.Name, nil)
p15.Namespace = "node-fit"
p16 := test.BuildTestPod("NOT1", 100, 0, node1.Name, nil)
p16.Namespace = "node-fit"
p17 := test.BuildTestPod("NOT2", 100, 0, node1.Name, nil)
p17.Namespace = "node-fit"
p18 := test.BuildTestPod("TARGET", 100, 0, node1.Name, nil)
p18.Namespace = "node-fit"
// ### Evictable Pods ###
@@ -113,100 +154,588 @@ func TestFindDuplicatePods(t *testing.T) {
Image: "foo",
})
// ### Pods Evictable Based On Node Fit ###
ownerRef3 := test.GetReplicaSetOwnerRefList()
p15.ObjectMeta.OwnerReferences = ownerRef3
p16.ObjectMeta.OwnerReferences = ownerRef3
p17.ObjectMeta.OwnerReferences = ownerRef3
p18.ObjectMeta.OwnerReferences = ownerRef3
p15.Spec.NodeSelector = map[string]string{
"datacenter": "west",
}
p16.Spec.NodeSelector = map[string]string{
"datacenter": "west",
}
p17.Spec.NodeSelector = map[string]string{
"datacenter": "west",
}
testCases := []struct {
description string
maxPodsToEvictPerNode int
pods []v1.Pod
expectedEvictedPodCount int
pods []*v1.Pod
nodes []*v1.Node
expectedEvictedPodCount uint
strategy api.DeschedulerStrategy
}{
{
description: "Three pods in the `dev` Namespace, bound to same ReplicaSet. 2 should be evicted.",
maxPodsToEvictPerNode: 5,
pods: []v1.Pod{*p1, *p2, *p3},
expectedEvictedPodCount: 2,
description: "Three pods in the `dev` Namespace, bound to same ReplicaSet. 1 should be evicted.",
pods: []*v1.Pod{p1, p2, p3},
nodes: []*v1.Node{node1, node2},
expectedEvictedPodCount: 1,
strategy: api.DeschedulerStrategy{},
},
{
description: "Three pods in the `dev` Namespace, bound to same ReplicaSet, but ReplicaSet kind is excluded. 0 should be evicted.",
maxPodsToEvictPerNode: 5,
pods: []v1.Pod{*p1, *p2, *p3},
pods: []*v1.Pod{p1, p2, p3},
nodes: []*v1.Node{node1, node2},
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{RemoveDuplicates: &api.RemoveDuplicates{ExcludeOwnerKinds: []string{"ReplicaSet"}}}},
},
{
description: "Three Pods in the `test` Namespace, bound to same ReplicaSet. 2 should be evicted.",
maxPodsToEvictPerNode: 5,
pods: []v1.Pod{*p8, *p9, *p10},
expectedEvictedPodCount: 2,
description: "Three Pods in the `test` Namespace, bound to same ReplicaSet. 1 should be evicted.",
pods: []*v1.Pod{p8, p9, p10},
nodes: []*v1.Node{node1, node2},
expectedEvictedPodCount: 1,
strategy: api.DeschedulerStrategy{},
},
{
description: "Three Pods in the `dev` Namespace, three Pods in the `test` Namespace. Bound to ReplicaSet with same name. 4 should be evicted.",
maxPodsToEvictPerNode: 5,
pods: []v1.Pod{*p1, *p2, *p3, *p8, *p9, *p10},
expectedEvictedPodCount: 4,
pods: []*v1.Pod{p1, p2, p3, p8, p9, p10},
nodes: []*v1.Node{node1, node2},
expectedEvictedPodCount: 2,
strategy: api.DeschedulerStrategy{},
},
{
description: "Pods are: part of DaemonSet, with local storage, mirror pod annotation, critical pod annotation - none should be evicted.",
maxPodsToEvictPerNode: 2,
pods: []v1.Pod{*p4, *p5, *p6, *p7},
pods: []*v1.Pod{p4, p5, p6, p7},
nodes: []*v1.Node{node1, node2},
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{},
},
{
description: "Test all Pods: 4 should be evicted.",
maxPodsToEvictPerNode: 5,
pods: []v1.Pod{*p1, *p2, *p3, *p4, *p5, *p6, *p7, *p8, *p9, *p10},
expectedEvictedPodCount: 4,
pods: []*v1.Pod{p1, p2, p3, p4, p5, p6, p7, p8, p9, p10},
nodes: []*v1.Node{node1, node2},
expectedEvictedPodCount: 2,
strategy: api.DeschedulerStrategy{},
},
{
description: "Pods with the same owner but different images should not be evicted",
maxPodsToEvictPerNode: 5,
pods: []v1.Pod{*p11, *p12},
pods: []*v1.Pod{p11, p12},
nodes: []*v1.Node{node1, node2},
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{},
},
{
description: "Pods with multiple containers should not match themselves",
maxPodsToEvictPerNode: 5,
pods: []v1.Pod{*p13},
pods: []*v1.Pod{p13},
nodes: []*v1.Node{node1, node2},
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{},
},
{
description: "Pods with matching ownerrefs and at not all matching image should not trigger an eviction",
maxPodsToEvictPerNode: 5,
pods: []v1.Pod{*p11, *p13},
pods: []*v1.Pod{p11, p13},
nodes: []*v1.Node{node1, node2},
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{},
},
{
description: "Three pods in the `dev` Namespace, bound to same ReplicaSet. Only node available has a taint, and nodeFit set to true. 0 should be evicted.",
pods: []*v1.Pod{p1, p2, p3},
nodes: []*v1.Node{node1, node3},
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}},
},
{
description: "Three pods in the `node-fit` Namespace, bound to same ReplicaSet, all with a nodeSelector. Only node available has an incorrect node label, and nodeFit set to true. 0 should be evicted.",
pods: []*v1.Pod{p15, p16, p17},
nodes: []*v1.Node{node1, node4},
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}},
},
{
description: "Three pods in the `node-fit` Namespace, bound to same ReplicaSet. Only node available is not schedulable, and nodeFit set to true. 0 should be evicted.",
pods: []*v1.Pod{p1, p2, p3},
nodes: []*v1.Node{node1, node5},
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}},
},
}
for _, testCase := range testCases {
fakeClient := &fake.Clientset{}
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
return true, &v1.PodList{Items: testCase.pods}, nil
})
fakeClient.Fake.AddReactor("get", "nodes", func(action core.Action) (bool, runtime.Object, error) {
return true, node, nil
})
podEvictor := evictions.NewPodEvictor(
fakeClient,
"v1",
false,
testCase.maxPodsToEvictPerNode,
[]*v1.Node{node},
false,
)
t.Run(testCase.description, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
RemoveDuplicatePods(ctx, fakeClient, testCase.strategy, []*v1.Node{node}, podEvictor)
podsEvicted := podEvictor.TotalEvicted()
if podsEvicted != testCase.expectedEvictedPodCount {
t.Errorf("Test error for description: %s. Expected evicted pods count %v, got %v", testCase.description, testCase.expectedEvictedPodCount, podsEvicted)
}
var objs []runtime.Object
for _, node := range testCase.nodes {
objs = append(objs, node)
}
for _, pod := range testCase.pods {
objs = append(objs, pod)
}
fakeClient := fake.NewSimpleClientset(objs...)
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
podInformer := sharedInformerFactory.Core().V1().Pods()
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {
t.Errorf("Build get pods assigned to node function error: %v", err)
}
sharedInformerFactory.Start(ctx.Done())
sharedInformerFactory.WaitForCacheSync(ctx.Done())
podEvictor := evictions.NewPodEvictor(
fakeClient,
"v1",
false,
nil,
nil,
testCase.nodes,
false,
false,
false,
false,
false,
)
RemoveDuplicatePods(ctx, fakeClient, testCase.strategy, testCase.nodes, podEvictor, getPodsAssignedToNode)
podsEvicted := podEvictor.TotalEvicted()
if podsEvicted != testCase.expectedEvictedPodCount {
t.Errorf("Test error for description: %s. Expected evicted pods count %v, got %v", testCase.description, testCase.expectedEvictedPodCount, podsEvicted)
}
})
}
}
func TestRemoveDuplicatesUniformly(t *testing.T) {
setRSOwnerRef2 := func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = []metav1.OwnerReference{
{Kind: "ReplicaSet", APIVersion: "v1", Name: "replicaset-2"},
}
}
setTwoRSOwnerRef := func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = []metav1.OwnerReference{
{Kind: "ReplicaSet", APIVersion: "v1", Name: "replicaset-1"},
{Kind: "ReplicaSet", APIVersion: "v1", Name: "replicaset-2"},
}
}
setTolerationsK1 := func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
pod.Spec.Tolerations = []v1.Toleration{
{
Key: "k1",
Value: "v1",
Operator: v1.TolerationOpEqual,
Effect: v1.TaintEffectNoSchedule,
},
}
}
setTolerationsK2 := func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
pod.Spec.Tolerations = []v1.Toleration{
{
Key: "k2",
Value: "v2",
Operator: v1.TolerationOpEqual,
Effect: v1.TaintEffectNoSchedule,
},
}
}
setMasterNoScheduleTaint := func(node *v1.Node) {
node.Spec.Taints = []v1.Taint{
{
Effect: v1.TaintEffectNoSchedule,
Key: "node-role.kubernetes.io/master",
},
}
}
setMasterNoScheduleLabel := func(node *v1.Node) {
if node.ObjectMeta.Labels == nil {
node.ObjectMeta.Labels = map[string]string{}
}
node.ObjectMeta.Labels["node-role.kubernetes.io/master"] = ""
}
setWorkerLabel := func(node *v1.Node) {
if node.ObjectMeta.Labels == nil {
node.ObjectMeta.Labels = map[string]string{}
}
node.ObjectMeta.Labels["node-role.kubernetes.io/worker"] = "k1"
node.ObjectMeta.Labels["node-role.kubernetes.io/worker"] = "k2"
}
setNotMasterNodeSelectorK1 := func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
pod.Spec.Affinity = &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: "node-role.kubernetes.io/master",
Operator: v1.NodeSelectorOpDoesNotExist,
},
{
Key: "k1",
Operator: v1.NodeSelectorOpDoesNotExist,
},
},
},
},
},
},
}
}
setNotMasterNodeSelectorK2 := func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
pod.Spec.Affinity = &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: "node-role.kubernetes.io/master",
Operator: v1.NodeSelectorOpDoesNotExist,
},
{
Key: "k2",
Operator: v1.NodeSelectorOpDoesNotExist,
},
},
},
},
},
},
}
}
setWorkerLabelSelectorK1 := func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
if pod.Spec.NodeSelector == nil {
pod.Spec.NodeSelector = map[string]string{}
}
pod.Spec.NodeSelector["node-role.kubernetes.io/worker"] = "k1"
}
setWorkerLabelSelectorK2 := func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
if pod.Spec.NodeSelector == nil {
pod.Spec.NodeSelector = map[string]string{}
}
pod.Spec.NodeSelector["node-role.kubernetes.io/worker"] = "k2"
}
testCases := []struct {
description string
pods []*v1.Pod
nodes []*v1.Node
expectedEvictedPodCount uint
strategy api.DeschedulerStrategy
}{
{
description: "Evict pods uniformly",
pods: []*v1.Pod{
// (5,3,1) -> (3,3,3) -> 2 evictions
test.BuildTestPod("p1", 100, 0, "n1", test.SetRSOwnerRef),
test.BuildTestPod("p2", 100, 0, "n1", test.SetRSOwnerRef),
test.BuildTestPod("p3", 100, 0, "n1", test.SetRSOwnerRef),
test.BuildTestPod("p4", 100, 0, "n1", test.SetRSOwnerRef),
test.BuildTestPod("p5", 100, 0, "n1", test.SetRSOwnerRef),
test.BuildTestPod("p6", 100, 0, "n2", test.SetRSOwnerRef),
test.BuildTestPod("p7", 100, 0, "n2", test.SetRSOwnerRef),
test.BuildTestPod("p8", 100, 0, "n2", test.SetRSOwnerRef),
test.BuildTestPod("p9", 100, 0, "n3", test.SetRSOwnerRef),
},
expectedEvictedPodCount: 2,
nodes: []*v1.Node{
test.BuildTestNode("n1", 2000, 3000, 10, nil),
test.BuildTestNode("n2", 2000, 3000, 10, nil),
test.BuildTestNode("n3", 2000, 3000, 10, nil),
},
strategy: api.DeschedulerStrategy{},
},
{
description: "Evict pods uniformly with one node left out",
pods: []*v1.Pod{
// (5,3,1) -> (4,4,1) -> 1 eviction
test.BuildTestPod("p1", 100, 0, "n1", test.SetRSOwnerRef),
test.BuildTestPod("p2", 100, 0, "n1", test.SetRSOwnerRef),
test.BuildTestPod("p3", 100, 0, "n1", test.SetRSOwnerRef),
test.BuildTestPod("p4", 100, 0, "n1", test.SetRSOwnerRef),
test.BuildTestPod("p5", 100, 0, "n1", test.SetRSOwnerRef),
test.BuildTestPod("p6", 100, 0, "n2", test.SetRSOwnerRef),
test.BuildTestPod("p7", 100, 0, "n2", test.SetRSOwnerRef),
test.BuildTestPod("p8", 100, 0, "n2", test.SetRSOwnerRef),
test.BuildTestPod("p9", 100, 0, "n3", test.SetRSOwnerRef),
},
expectedEvictedPodCount: 1,
nodes: []*v1.Node{
test.BuildTestNode("n1", 2000, 3000, 10, nil),
test.BuildTestNode("n2", 2000, 3000, 10, nil),
},
strategy: api.DeschedulerStrategy{},
},
{
description: "Evict pods uniformly with two replica sets",
pods: []*v1.Pod{
// (5,3,1) -> (3,3,3) -> 2 evictions
test.BuildTestPod("p11", 100, 0, "n1", setTwoRSOwnerRef),
test.BuildTestPod("p12", 100, 0, "n1", setTwoRSOwnerRef),
test.BuildTestPod("p13", 100, 0, "n1", setTwoRSOwnerRef),
test.BuildTestPod("p14", 100, 0, "n1", setTwoRSOwnerRef),
test.BuildTestPod("p15", 100, 0, "n1", setTwoRSOwnerRef),
test.BuildTestPod("p16", 100, 0, "n2", setTwoRSOwnerRef),
test.BuildTestPod("p17", 100, 0, "n2", setTwoRSOwnerRef),
test.BuildTestPod("p18", 100, 0, "n2", setTwoRSOwnerRef),
test.BuildTestPod("p19", 100, 0, "n3", setTwoRSOwnerRef),
},
expectedEvictedPodCount: 4,
nodes: []*v1.Node{
test.BuildTestNode("n1", 2000, 3000, 10, nil),
test.BuildTestNode("n2", 2000, 3000, 10, nil),
test.BuildTestNode("n3", 2000, 3000, 10, nil),
},
strategy: api.DeschedulerStrategy{},
},
{
description: "Evict pods uniformly with two owner references",
pods: []*v1.Pod{
// (5,3,1) -> (3,3,3) -> 2 evictions
test.BuildTestPod("p11", 100, 0, "n1", test.SetRSOwnerRef),
test.BuildTestPod("p12", 100, 0, "n1", test.SetRSOwnerRef),
test.BuildTestPod("p13", 100, 0, "n1", test.SetRSOwnerRef),
test.BuildTestPod("p14", 100, 0, "n1", test.SetRSOwnerRef),
test.BuildTestPod("p15", 100, 0, "n1", test.SetRSOwnerRef),
test.BuildTestPod("p16", 100, 0, "n2", test.SetRSOwnerRef),
test.BuildTestPod("p17", 100, 0, "n2", test.SetRSOwnerRef),
test.BuildTestPod("p18", 100, 0, "n2", test.SetRSOwnerRef),
test.BuildTestPod("p19", 100, 0, "n3", test.SetRSOwnerRef),
// (1,3,5) -> (3,3,3) -> 2 evictions
test.BuildTestPod("p21", 100, 0, "n1", setRSOwnerRef2),
test.BuildTestPod("p22", 100, 0, "n2", setRSOwnerRef2),
test.BuildTestPod("p23", 100, 0, "n2", setRSOwnerRef2),
test.BuildTestPod("p24", 100, 0, "n2", setRSOwnerRef2),
test.BuildTestPod("p25", 100, 0, "n3", setRSOwnerRef2),
test.BuildTestPod("p26", 100, 0, "n3", setRSOwnerRef2),
test.BuildTestPod("p27", 100, 0, "n3", setRSOwnerRef2),
test.BuildTestPod("p28", 100, 0, "n3", setRSOwnerRef2),
test.BuildTestPod("p29", 100, 0, "n3", setRSOwnerRef2),
},
expectedEvictedPodCount: 4,
nodes: []*v1.Node{
test.BuildTestNode("n1", 2000, 3000, 10, nil),
test.BuildTestNode("n2", 2000, 3000, 10, nil),
test.BuildTestNode("n3", 2000, 3000, 10, nil),
},
strategy: api.DeschedulerStrategy{},
},
{
description: "Evict pods with number of pods less than nodes",
pods: []*v1.Pod{
// (2,0,0) -> (1,1,0) -> 1 eviction
test.BuildTestPod("p1", 100, 0, "n1", test.SetRSOwnerRef),
test.BuildTestPod("p2", 100, 0, "n1", test.SetRSOwnerRef),
},
expectedEvictedPodCount: 1,
nodes: []*v1.Node{
test.BuildTestNode("n1", 2000, 3000, 10, nil),
test.BuildTestNode("n2", 2000, 3000, 10, nil),
test.BuildTestNode("n3", 2000, 3000, 10, nil),
},
strategy: api.DeschedulerStrategy{},
},
{
description: "Evict pods with number of pods less than nodes, but ignore different pods with the same ownerref",
pods: []*v1.Pod{
// (1, 0, 0) for "bar","baz" images -> no eviction, even with a matching ownerKey
// (2, 0, 0) for "foo" image -> (1,1,0) - 1 eviction
// In this case the only "real" duplicates are p1 and p4, so one of those should be evicted
buildTestPodWithImage("p1", "n1", "foo"),
buildTestPodWithImage("p2", "n1", "bar"),
buildTestPodWithImage("p3", "n1", "baz"),
buildTestPodWithImage("p4", "n1", "foo"),
},
expectedEvictedPodCount: 1,
nodes: []*v1.Node{
test.BuildTestNode("n1", 2000, 3000, 10, nil),
test.BuildTestNode("n2", 2000, 3000, 10, nil),
test.BuildTestNode("n3", 2000, 3000, 10, nil),
},
strategy: api.DeschedulerStrategy{},
},
{
description: "Evict pods with a single pod with three nodes",
pods: []*v1.Pod{
// (2,0,0) -> (1,1,0) -> 1 eviction
test.BuildTestPod("p1", 100, 0, "n1", test.SetRSOwnerRef),
},
expectedEvictedPodCount: 0,
nodes: []*v1.Node{
test.BuildTestNode("n1", 2000, 3000, 10, nil),
test.BuildTestNode("n2", 2000, 3000, 10, nil),
test.BuildTestNode("n3", 2000, 3000, 10, nil),
},
strategy: api.DeschedulerStrategy{},
},
{
description: "Evict pods uniformly respecting taints",
pods: []*v1.Pod{
// (5,3,1,0,0,0) -> (3,3,3,0,0,0) -> 2 evictions
test.BuildTestPod("p1", 100, 0, "worker1", setTolerationsK1),
test.BuildTestPod("p2", 100, 0, "worker1", setTolerationsK2),
test.BuildTestPod("p3", 100, 0, "worker1", setTolerationsK1),
test.BuildTestPod("p4", 100, 0, "worker1", setTolerationsK2),
test.BuildTestPod("p5", 100, 0, "worker1", setTolerationsK1),
test.BuildTestPod("p6", 100, 0, "worker2", setTolerationsK2),
test.BuildTestPod("p7", 100, 0, "worker2", setTolerationsK1),
test.BuildTestPod("p8", 100, 0, "worker2", setTolerationsK2),
test.BuildTestPod("p9", 100, 0, "worker3", setTolerationsK1),
},
expectedEvictedPodCount: 2,
nodes: []*v1.Node{
test.BuildTestNode("worker1", 2000, 3000, 10, nil),
test.BuildTestNode("worker2", 2000, 3000, 10, nil),
test.BuildTestNode("worker3", 2000, 3000, 10, nil),
test.BuildTestNode("master1", 2000, 3000, 10, setMasterNoScheduleTaint),
test.BuildTestNode("master2", 2000, 3000, 10, setMasterNoScheduleTaint),
test.BuildTestNode("master3", 2000, 3000, 10, setMasterNoScheduleTaint),
},
strategy: api.DeschedulerStrategy{},
},
{
description: "Evict pods uniformly respecting RequiredDuringSchedulingIgnoredDuringExecution node affinity",
pods: []*v1.Pod{
// (5,3,1,0,0,0) -> (3,3,3,0,0,0) -> 2 evictions
test.BuildTestPod("p1", 100, 0, "worker1", setNotMasterNodeSelectorK1),
test.BuildTestPod("p2", 100, 0, "worker1", setNotMasterNodeSelectorK2),
test.BuildTestPod("p3", 100, 0, "worker1", setNotMasterNodeSelectorK1),
test.BuildTestPod("p4", 100, 0, "worker1", setNotMasterNodeSelectorK2),
test.BuildTestPod("p5", 100, 0, "worker1", setNotMasterNodeSelectorK1),
test.BuildTestPod("p6", 100, 0, "worker2", setNotMasterNodeSelectorK2),
test.BuildTestPod("p7", 100, 0, "worker2", setNotMasterNodeSelectorK1),
test.BuildTestPod("p8", 100, 0, "worker2", setNotMasterNodeSelectorK2),
test.BuildTestPod("p9", 100, 0, "worker3", setNotMasterNodeSelectorK1),
},
expectedEvictedPodCount: 2,
nodes: []*v1.Node{
test.BuildTestNode("worker1", 2000, 3000, 10, nil),
test.BuildTestNode("worker2", 2000, 3000, 10, nil),
test.BuildTestNode("worker3", 2000, 3000, 10, nil),
test.BuildTestNode("master1", 2000, 3000, 10, setMasterNoScheduleLabel),
test.BuildTestNode("master2", 2000, 3000, 10, setMasterNoScheduleLabel),
test.BuildTestNode("master3", 2000, 3000, 10, setMasterNoScheduleLabel),
},
strategy: api.DeschedulerStrategy{},
},
{
description: "Evict pods uniformly respecting node selector",
pods: []*v1.Pod{
// (5,3,1,0,0,0) -> (3,3,3,0,0,0) -> 2 evictions
test.BuildTestPod("p1", 100, 0, "worker1", setWorkerLabelSelectorK1),
test.BuildTestPod("p2", 100, 0, "worker1", setWorkerLabelSelectorK2),
test.BuildTestPod("p3", 100, 0, "worker1", setWorkerLabelSelectorK1),
test.BuildTestPod("p4", 100, 0, "worker1", setWorkerLabelSelectorK2),
test.BuildTestPod("p5", 100, 0, "worker1", setWorkerLabelSelectorK1),
test.BuildTestPod("p6", 100, 0, "worker2", setWorkerLabelSelectorK2),
test.BuildTestPod("p7", 100, 0, "worker2", setWorkerLabelSelectorK1),
test.BuildTestPod("p8", 100, 0, "worker2", setWorkerLabelSelectorK2),
test.BuildTestPod("p9", 100, 0, "worker3", setWorkerLabelSelectorK1),
},
expectedEvictedPodCount: 2,
nodes: []*v1.Node{
test.BuildTestNode("worker1", 2000, 3000, 10, setWorkerLabel),
test.BuildTestNode("worker2", 2000, 3000, 10, setWorkerLabel),
test.BuildTestNode("worker3", 2000, 3000, 10, setWorkerLabel),
test.BuildTestNode("master1", 2000, 3000, 10, nil),
test.BuildTestNode("master2", 2000, 3000, 10, nil),
test.BuildTestNode("master3", 2000, 3000, 10, nil),
},
strategy: api.DeschedulerStrategy{},
},
{
description: "Evict pods uniformly respecting node selector with zero target nodes",
pods: []*v1.Pod{
// (5,3,1,0,0,0) -> (3,3,3,0,0,0) -> 2 evictions
test.BuildTestPod("p1", 100, 0, "worker1", setWorkerLabelSelectorK1),
test.BuildTestPod("p2", 100, 0, "worker1", setWorkerLabelSelectorK2),
test.BuildTestPod("p3", 100, 0, "worker1", setWorkerLabelSelectorK1),
test.BuildTestPod("p4", 100, 0, "worker1", setWorkerLabelSelectorK2),
test.BuildTestPod("p5", 100, 0, "worker1", setWorkerLabelSelectorK1),
test.BuildTestPod("p6", 100, 0, "worker2", setWorkerLabelSelectorK2),
test.BuildTestPod("p7", 100, 0, "worker2", setWorkerLabelSelectorK1),
test.BuildTestPod("p8", 100, 0, "worker2", setWorkerLabelSelectorK2),
test.BuildTestPod("p9", 100, 0, "worker3", setWorkerLabelSelectorK1),
},
expectedEvictedPodCount: 0,
nodes: []*v1.Node{
test.BuildTestNode("worker1", 2000, 3000, 10, nil),
test.BuildTestNode("worker2", 2000, 3000, 10, nil),
test.BuildTestNode("worker3", 2000, 3000, 10, nil),
test.BuildTestNode("master1", 2000, 3000, 10, nil),
test.BuildTestNode("master2", 2000, 3000, 10, nil),
test.BuildTestNode("master3", 2000, 3000, 10, nil),
},
strategy: api.DeschedulerStrategy{},
},
}
for _, testCase := range testCases {
t.Run(testCase.description, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
var objs []runtime.Object
for _, node := range testCase.nodes {
objs = append(objs, node)
}
for _, pod := range testCase.pods {
objs = append(objs, pod)
}
fakeClient := fake.NewSimpleClientset(objs...)
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
podInformer := sharedInformerFactory.Core().V1().Pods()
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {
t.Errorf("Build get pods assigned to node function error: %v", err)
}
sharedInformerFactory.Start(ctx.Done())
sharedInformerFactory.WaitForCacheSync(ctx.Done())
podEvictor := evictions.NewPodEvictor(
fakeClient,
policyv1.SchemeGroupVersion.String(),
false,
nil,
nil,
testCase.nodes,
false,
false,
false,
false,
false,
)
RemoveDuplicatePods(ctx, fakeClient, testCase.strategy, testCase.nodes, podEvictor, getPodsAssignedToNode)
podsEvicted := podEvictor.TotalEvicted()
if podsEvicted != testCase.expectedEvictedPodCount {
t.Errorf("Test error for description: %s. Expected evicted pods count %v, got %v", testCase.description, testCase.expectedEvictedPodCount, podsEvicted)
}
})
}
}

View File

@@ -0,0 +1,179 @@
package strategies
import (
"context"
"fmt"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/sets"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/descheduler/strategies/validation"
)
// validatedFailedPodsStrategyParams contains validated strategy parameters
type validatedFailedPodsStrategyParams struct {
validation.ValidatedStrategyParams
includingInitContainers bool
reasons sets.String
excludeOwnerKinds sets.String
minPodLifetimeSeconds *uint
}
// RemoveFailedPods removes Pods that are in failed status phase.
func RemoveFailedPods(
ctx context.Context,
client clientset.Interface,
strategy api.DeschedulerStrategy,
nodes []*v1.Node,
podEvictor *evictions.PodEvictor,
getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc,
) {
strategyParams, err := validateAndParseRemoveFailedPodsParams(ctx, client, strategy.Params)
if err != nil {
klog.ErrorS(err, "Invalid RemoveFailedPods parameters")
return
}
evictable := podEvictor.Evictable(
evictions.WithPriorityThreshold(strategyParams.ThresholdPriority),
evictions.WithNodeFit(strategyParams.NodeFit),
evictions.WithLabelSelector(strategyParams.LabelSelector),
)
var labelSelector *metav1.LabelSelector
if strategy.Params != nil {
labelSelector = strategy.Params.LabelSelector
}
podFilter, err := podutil.NewOptions().
WithFilter(evictable.IsEvictable).
WithNamespaces(strategyParams.IncludedNamespaces).
WithoutNamespaces(strategyParams.ExcludedNamespaces).
WithLabelSelector(labelSelector).
BuildFilterFunc()
if err != nil {
klog.ErrorS(err, "Error initializing pod filter function")
return
}
// Only list failed pods
phaseFilter := func(pod *v1.Pod) bool { return pod.Status.Phase == v1.PodFailed }
podFilter = podutil.WrapFilterFuncs(phaseFilter, podFilter)
for _, node := range nodes {
klog.V(1).InfoS("Processing node", "node", klog.KObj(node))
pods, err := podutil.ListAllPodsOnANode(node.Name, getPodsAssignedToNode, podFilter)
if err != nil {
klog.ErrorS(err, "Error listing a nodes failed pods", "node", klog.KObj(node))
continue
}
for i, pod := range pods {
if err = validateFailedPodShouldEvict(pod, *strategyParams); err != nil {
klog.V(4).InfoS(fmt.Sprintf("ignoring pod for eviction due to: %s", err.Error()), "pod", klog.KObj(pod))
continue
}
if _, err = podEvictor.EvictPod(ctx, pods[i], node, "FailedPod"); err != nil {
klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod))
break
}
}
}
}
func validateAndParseRemoveFailedPodsParams(
ctx context.Context,
client clientset.Interface,
params *api.StrategyParameters,
) (*validatedFailedPodsStrategyParams, error) {
if params == nil {
return &validatedFailedPodsStrategyParams{
ValidatedStrategyParams: validation.DefaultValidatedStrategyParams(),
}, nil
}
strategyParams, err := validation.ValidateAndParseStrategyParams(ctx, client, params)
if err != nil {
return nil, err
}
var reasons, excludeOwnerKinds sets.String
var includingInitContainers bool
var minPodLifetimeSeconds *uint
if params.FailedPods != nil {
reasons = sets.NewString(params.FailedPods.Reasons...)
includingInitContainers = params.FailedPods.IncludingInitContainers
excludeOwnerKinds = sets.NewString(params.FailedPods.ExcludeOwnerKinds...)
minPodLifetimeSeconds = params.FailedPods.MinPodLifetimeSeconds
}
return &validatedFailedPodsStrategyParams{
ValidatedStrategyParams: *strategyParams,
includingInitContainers: includingInitContainers,
reasons: reasons,
excludeOwnerKinds: excludeOwnerKinds,
minPodLifetimeSeconds: minPodLifetimeSeconds,
}, nil
}
// validateFailedPodShouldEvict looks at strategy params settings to see if the Pod
// should be evicted given the params in the PodFailed policy.
func validateFailedPodShouldEvict(pod *v1.Pod, strategyParams validatedFailedPodsStrategyParams) error {
var errs []error
if strategyParams.minPodLifetimeSeconds != nil {
podAgeSeconds := uint(metav1.Now().Sub(pod.GetCreationTimestamp().Local()).Seconds())
if podAgeSeconds < *strategyParams.minPodLifetimeSeconds {
errs = append(errs, fmt.Errorf("pod does not exceed the min age seconds of %d", *strategyParams.minPodLifetimeSeconds))
}
}
if len(strategyParams.excludeOwnerKinds) > 0 {
ownerRefList := podutil.OwnerRef(pod)
for _, owner := range ownerRefList {
if strategyParams.excludeOwnerKinds.Has(owner.Kind) {
errs = append(errs, fmt.Errorf("pod's owner kind of %s is excluded", owner.Kind))
}
}
}
if len(strategyParams.reasons) > 0 {
reasons := getFailedContainerStatusReasons(pod.Status.ContainerStatuses)
if pod.Status.Phase == v1.PodFailed && pod.Status.Reason != "" {
reasons = append(reasons, pod.Status.Reason)
}
if strategyParams.includingInitContainers {
reasons = append(reasons, getFailedContainerStatusReasons(pod.Status.InitContainerStatuses)...)
}
if !strategyParams.reasons.HasAny(reasons...) {
errs = append(errs, fmt.Errorf("pod does not match any of the reasons"))
}
}
return utilerrors.NewAggregate(errs)
}
func getFailedContainerStatusReasons(containerStatuses []v1.ContainerStatus) []string {
reasons := make([]string, 0)
for _, containerStatus := range containerStatuses {
if containerStatus.State.Waiting != nil && containerStatus.State.Waiting.Reason != "" {
reasons = append(reasons, containerStatus.State.Waiting.Reason)
}
if containerStatus.State.Terminated != nil && containerStatus.State.Terminated.Reason != "" {
reasons = append(reasons, containerStatus.State.Terminated.Reason)
}
}
return reasons
}

View File

@@ -0,0 +1,343 @@
package strategies
import (
"context"
"testing"
v1 "k8s.io/api/core/v1"
policyv1 "k8s.io/api/policy/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/test"
)
var (
OneHourInSeconds uint = 3600
)
func TestRemoveFailedPods(t *testing.T) {
createStrategy := func(enabled, includingInitContainers bool, reasons, excludeKinds []string, minAgeSeconds *uint, nodeFit bool) api.DeschedulerStrategy {
return api.DeschedulerStrategy{
Enabled: enabled,
Params: &api.StrategyParameters{
FailedPods: &api.FailedPods{
Reasons: reasons,
IncludingInitContainers: includingInitContainers,
ExcludeOwnerKinds: excludeKinds,
MinPodLifetimeSeconds: minAgeSeconds,
},
NodeFit: nodeFit,
},
}
}
tests := []struct {
description string
nodes []*v1.Node
strategy api.DeschedulerStrategy
expectedEvictedPodCount uint
pods []*v1.Pod
}{
{
description: "default empty strategy, 0 failures, 0 evictions",
strategy: api.DeschedulerStrategy{},
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{}, // no pods come back with field selector phase=Failed
},
{
description: "0 failures, 0 evictions",
strategy: createStrategy(true, false, nil, nil, nil, false),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{}, // no pods come back with field selector phase=Failed
},
{
description: "1 container terminated with reason NodeAffinity, 1 eviction",
strategy: createStrategy(true, false, nil, nil, nil, false),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"},
}), nil),
},
},
{
description: "1 init container terminated with reason NodeAffinity, 1 eviction",
strategy: createStrategy(true, true, nil, nil, nil, false),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"},
}, nil), nil),
},
},
{
description: "1 init container waiting with reason CreateContainerConfigError, 1 eviction",
strategy: createStrategy(true, true, nil, nil, nil, false),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", &v1.ContainerState{
Waiting: &v1.ContainerStateWaiting{Reason: "CreateContainerConfigError"},
}, nil), nil),
},
},
{
description: "2 init container waiting with reason CreateContainerConfigError, 2 nodes, 2 evictions",
strategy: createStrategy(true, true, nil, nil, nil, false),
nodes: []*v1.Node{
test.BuildTestNode("node1", 2000, 3000, 10, nil),
test.BuildTestNode("node2", 2000, 3000, 10, nil),
},
expectedEvictedPodCount: 2,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "CreateContainerConfigError"},
}, nil), nil),
buildTestPod("p2", "node2", newPodStatus("", "", &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "CreateContainerConfigError"},
}, nil), nil),
},
},
{
description: "include reason=CreateContainerConfigError, 1 container terminated with reason CreateContainerConfigError, 1 eviction",
strategy: createStrategy(true, false, []string{"CreateContainerConfigError"}, nil, nil, false),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "CreateContainerConfigError"},
}), nil),
},
},
{
description: "include reason=CreateContainerConfigError+NodeAffinity, 1 container terminated with reason CreateContainerConfigError, 1 eviction",
strategy: createStrategy(true, false, []string{"CreateContainerConfigError", "NodeAffinity"}, nil, nil, false),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "CreateContainerConfigError"},
}), nil),
},
},
{
description: "include reason=CreateContainerConfigError, 1 container terminated with reason NodeAffinity, 0 eviction",
strategy: createStrategy(true, false, []string{"CreateContainerConfigError"}, nil, nil, false),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"},
}), nil),
},
},
{
description: "include init container=false, 1 init container waiting with reason CreateContainerConfigError, 0 eviction",
strategy: createStrategy(true, false, []string{"CreateContainerConfigError"}, nil, nil, false),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", &v1.ContainerState{
Waiting: &v1.ContainerStateWaiting{Reason: "CreateContainerConfigError"},
}, nil), nil),
},
},
{
description: "lifetime 1 hour, 1 container terminated with reason NodeAffinity, 0 eviction",
strategy: createStrategy(true, false, nil, nil, &OneHourInSeconds, false),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"},
}), nil),
},
},
{
description: "nodeFit=true, 1 unschedulable node, 1 container terminated with reason NodeAffinity, 0 eviction",
strategy: createStrategy(true, false, nil, nil, nil, true),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, func(node *v1.Node) {
node.Spec.Unschedulable = true
})},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"},
}), nil),
},
},
{
description: "excluded owner kind=ReplicaSet, 1 init container terminated with owner kind=ReplicaSet, 0 eviction",
strategy: createStrategy(true, true, nil, []string{"ReplicaSet"}, nil, false),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"},
}, nil), nil),
},
},
{
description: "excluded owner kind=DaemonSet, 1 init container terminated with owner kind=ReplicaSet, 1 eviction",
strategy: createStrategy(true, true, nil, []string{"DaemonSet"}, nil, false),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"},
}, nil), nil),
},
},
{
description: "excluded owner kind=DaemonSet, 1 init container terminated with owner kind=ReplicaSet, 1 pod in termination; nothing should be moved",
strategy: createStrategy(true, true, nil, []string{"DaemonSet"}, nil, false),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("", "", &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"},
}, nil), &metav1.Time{}),
},
},
{
description: "1 container terminated with reason ShutDown, 0 evictions",
strategy: createStrategy(true, false, nil, nil, nil, true),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("Shutdown", v1.PodFailed, nil, nil), nil),
},
},
{
description: "include reason=Shutdown, 2 containers terminated with reason ShutDown, 2 evictions",
strategy: createStrategy(true, false, []string{"Shutdown"}, nil, nil, false),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 2,
pods: []*v1.Pod{
buildTestPod("p1", "node1", newPodStatus("Shutdown", v1.PodFailed, nil, nil), nil),
buildTestPod("p2", "node1", newPodStatus("Shutdown", v1.PodFailed, nil, nil), nil),
},
},
}
for _, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
var objs []runtime.Object
for _, node := range tc.nodes {
objs = append(objs, node)
}
for _, pod := range tc.pods {
objs = append(objs, pod)
}
fakeClient := fake.NewSimpleClientset(objs...)
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
podInformer := sharedInformerFactory.Core().V1().Pods()
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {
t.Errorf("Build get pods assigned to node function error: %v", err)
}
sharedInformerFactory.Start(ctx.Done())
sharedInformerFactory.WaitForCacheSync(ctx.Done())
podEvictor := evictions.NewPodEvictor(
fakeClient,
policyv1.SchemeGroupVersion.String(),
false,
nil,
nil,
tc.nodes,
false,
false,
false,
false,
false,
)
RemoveFailedPods(ctx, fakeClient, tc.strategy, tc.nodes, podEvictor, getPodsAssignedToNode)
actualEvictedPodCount := podEvictor.TotalEvicted()
if actualEvictedPodCount != tc.expectedEvictedPodCount {
t.Errorf("Test %#v failed, expected %v pod evictions, but got %v pod evictions\n", tc.description, tc.expectedEvictedPodCount, actualEvictedPodCount)
}
})
}
}
func TestValidRemoveFailedPodsParams(t *testing.T) {
ctx := context.Background()
fakeClient := &fake.Clientset{}
testCases := []struct {
name string
params *api.StrategyParameters
}{
{name: "validate nil params", params: nil},
{name: "validate empty params", params: &api.StrategyParameters{}},
{name: "validate reasons params", params: &api.StrategyParameters{FailedPods: &api.FailedPods{
Reasons: []string{"CreateContainerConfigError"},
}}},
{name: "validate includingInitContainers params", params: &api.StrategyParameters{FailedPods: &api.FailedPods{
IncludingInitContainers: true,
}}},
{name: "validate excludeOwnerKinds params", params: &api.StrategyParameters{FailedPods: &api.FailedPods{
ExcludeOwnerKinds: []string{"Job"},
}}},
{name: "validate excludeOwnerKinds params", params: &api.StrategyParameters{FailedPods: &api.FailedPods{
MinPodLifetimeSeconds: &OneHourInSeconds,
}}},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
params, err := validateAndParseRemoveFailedPodsParams(ctx, fakeClient, tc.params)
if err != nil {
t.Errorf("strategy params should be valid but got err: %v", err.Error())
}
if params == nil {
t.Errorf("strategy params should return a ValidatedFailedPodsStrategyParams but got nil")
}
})
}
}
func newPodStatus(reason string, phase v1.PodPhase, initContainerState, containerState *v1.ContainerState) v1.PodStatus {
ps := v1.PodStatus{
Reason: reason,
Phase: phase,
}
if initContainerState != nil {
ps.InitContainerStatuses = []v1.ContainerStatus{{State: *initContainerState}}
ps.Phase = v1.PodFailed
}
if containerState != nil {
ps.ContainerStatuses = []v1.ContainerStatus{{State: *containerState}}
ps.Phase = v1.PodFailed
}
return ps
}
func buildTestPod(podName, nodeName string, podStatus v1.PodStatus, deletionTimestamp *metav1.Time) *v1.Pod {
pod := test.BuildTestPod(podName, 1, 1, nodeName, func(p *v1.Pod) {
p.Status = podStatus
})
pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
pod.ObjectMeta.SetCreationTimestamp(metav1.Now())
pod.DeletionTimestamp = deletionTimestamp
return pod
}

View File

@@ -1,408 +0,0 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package strategies
import (
"context"
"fmt"
"sort"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
)
// NodeUsageMap stores a node's info, pods on it and its resource usage
type NodeUsageMap struct {
node *v1.Node
usage api.ResourceThresholds
allPods []*v1.Pod
}
// NodePodsMap is a set of (node, pods) pairs
type NodePodsMap map[*v1.Node][]*v1.Pod
const (
// MinResourcePercentage is the minimum value of a resource's percentage
MinResourcePercentage = 0
// MaxResourcePercentage is the maximum value of a resource's percentage
MaxResourcePercentage = 100
)
// LowNodeUtilization evicts pods from overutilized nodes to underutilized nodes. Note that CPU/Memory requests are used
// to calculate nodes' utilization and not the actual resource usage.
func LowNodeUtilization(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor) {
// todo: move to config validation?
// TODO: May be create a struct for the strategy as well, so that we don't have to pass along the all the params?
if strategy.Params == nil || strategy.Params.NodeResourceUtilizationThresholds == nil {
klog.V(1).Infof("NodeResourceUtilizationThresholds not set")
return
}
thresholds := strategy.Params.NodeResourceUtilizationThresholds.Thresholds
targetThresholds := strategy.Params.NodeResourceUtilizationThresholds.TargetThresholds
if err := validateStrategyConfig(thresholds, targetThresholds); err != nil {
klog.Errorf("LowNodeUtilization config is not valid: %v", err)
return
}
// check if Pods/CPU/Mem are set, if not, set them to 100
if _, ok := thresholds[v1.ResourcePods]; !ok {
thresholds[v1.ResourcePods] = MaxResourcePercentage
targetThresholds[v1.ResourcePods] = MaxResourcePercentage
}
if _, ok := thresholds[v1.ResourceCPU]; !ok {
thresholds[v1.ResourceCPU] = MaxResourcePercentage
targetThresholds[v1.ResourceCPU] = MaxResourcePercentage
}
if _, ok := thresholds[v1.ResourceMemory]; !ok {
thresholds[v1.ResourceMemory] = MaxResourcePercentage
targetThresholds[v1.ResourceMemory] = MaxResourcePercentage
}
npm := createNodePodsMap(ctx, client, nodes)
lowNodes, targetNodes := classifyNodes(npm, thresholds, targetThresholds)
klog.V(1).Infof("Criteria for a node under utilization: CPU: %v, Mem: %v, Pods: %v",
thresholds[v1.ResourceCPU], thresholds[v1.ResourceMemory], thresholds[v1.ResourcePods])
if len(lowNodes) == 0 {
klog.V(1).Infof("No node is underutilized, nothing to do here, you might tune your thresholds further")
return
}
klog.V(1).Infof("Total number of underutilized nodes: %v", len(lowNodes))
if len(lowNodes) < strategy.Params.NodeResourceUtilizationThresholds.NumberOfNodes {
klog.V(1).Infof("number of nodes underutilized (%v) is less than NumberOfNodes (%v), nothing to do here", len(lowNodes), strategy.Params.NodeResourceUtilizationThresholds.NumberOfNodes)
return
}
if len(lowNodes) == len(nodes) {
klog.V(1).Infof("all nodes are underutilized, nothing to do here")
return
}
if len(targetNodes) == 0 {
klog.V(1).Infof("all nodes are under target utilization, nothing to do here")
return
}
klog.V(1).Infof("Criteria for a node above target utilization: CPU: %v, Mem: %v, Pods: %v",
targetThresholds[v1.ResourceCPU], targetThresholds[v1.ResourceMemory], targetThresholds[v1.ResourcePods])
klog.V(1).Infof("Total number of nodes above target utilization: %v", len(targetNodes))
evictPodsFromTargetNodes(
ctx,
targetNodes,
lowNodes,
targetThresholds,
podEvictor)
klog.V(1).Infof("Total number of pods evicted: %v", podEvictor.TotalEvicted())
}
// validateStrategyConfig checks if the strategy's config is valid
func validateStrategyConfig(thresholds, targetThresholds api.ResourceThresholds) error {
// validate thresholds and targetThresholds config
if err := validateThresholds(thresholds); err != nil {
return fmt.Errorf("thresholds config is not valid: %v", err)
}
if err := validateThresholds(targetThresholds); err != nil {
return fmt.Errorf("targetThresholds config is not valid: %v", err)
}
// validate if thresholds and targetThresholds have same resources configured
if len(thresholds) != len(targetThresholds) {
return fmt.Errorf("thresholds and targetThresholds configured different resources")
}
for resourceName, value := range thresholds {
if targetValue, ok := targetThresholds[resourceName]; !ok {
return fmt.Errorf("thresholds and targetThresholds configured different resources")
} else if value > targetValue {
return fmt.Errorf("thresholds' %v percentage is greater than targetThresholds'", resourceName)
}
}
return nil
}
// validateThresholds checks if thresholds have valid resource name and resource percentage configured
func validateThresholds(thresholds api.ResourceThresholds) error {
if thresholds == nil || len(thresholds) == 0 {
return fmt.Errorf("no resource threshold is configured")
}
for name, percent := range thresholds {
switch name {
case v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods:
if percent < MinResourcePercentage || percent > MaxResourcePercentage {
return fmt.Errorf("%v threshold not in [%v, %v] range", name, MinResourcePercentage, MaxResourcePercentage)
}
default:
return fmt.Errorf("only cpu, memory, or pods thresholds can be specified")
}
}
return nil
}
// classifyNodes classifies the nodes into low-utilization or high-utilization nodes. If a node lies between
// low and high thresholds, it is simply ignored.
func classifyNodes(npm NodePodsMap, thresholds api.ResourceThresholds, targetThresholds api.ResourceThresholds) ([]NodeUsageMap, []NodeUsageMap) {
lowNodes, targetNodes := []NodeUsageMap{}, []NodeUsageMap{}
for node, pods := range npm {
usage := nodeUtilization(node, pods)
nuMap := NodeUsageMap{
node: node,
usage: usage,
allPods: pods,
}
// Check if node is underutilized and if we can schedule pods on it.
if !nodeutil.IsNodeUnschedulable(node) && isNodeWithLowUtilization(usage, thresholds) {
klog.V(2).Infof("Node %#v is under utilized with usage: %#v", node.Name, usage)
lowNodes = append(lowNodes, nuMap)
} else if isNodeAboveTargetUtilization(usage, targetThresholds) {
klog.V(2).Infof("Node %#v is over utilized with usage: %#v", node.Name, usage)
targetNodes = append(targetNodes, nuMap)
} else {
klog.V(2).Infof("Node %#v is appropriately utilized with usage: %#v", node.Name, usage)
}
}
return lowNodes, targetNodes
}
// evictPodsFromTargetNodes evicts pods based on priority, if all the pods on the node have priority, if not
// evicts them based on QoS as fallback option.
// TODO: @ravig Break this function into smaller functions.
func evictPodsFromTargetNodes(
ctx context.Context,
targetNodes, lowNodes []NodeUsageMap,
targetThresholds api.ResourceThresholds,
podEvictor *evictions.PodEvictor,
) {
sortNodesByUsage(targetNodes)
// upper bound on total number of pods/cpu/memory to be moved
var totalPods, totalCPU, totalMem float64
var taintsOfLowNodes = make(map[string][]v1.Taint, len(lowNodes))
for _, node := range lowNodes {
taintsOfLowNodes[node.node.Name] = node.node.Spec.Taints
nodeCapacity := node.node.Status.Capacity
if len(node.node.Status.Allocatable) > 0 {
nodeCapacity = node.node.Status.Allocatable
}
// totalPods to be moved
podsPercentage := targetThresholds[v1.ResourcePods] - node.usage[v1.ResourcePods]
totalPods += ((float64(podsPercentage) * float64(nodeCapacity.Pods().Value())) / 100)
// totalCPU capacity to be moved
cpuPercentage := targetThresholds[v1.ResourceCPU] - node.usage[v1.ResourceCPU]
totalCPU += ((float64(cpuPercentage) * float64(nodeCapacity.Cpu().MilliValue())) / 100)
// totalMem capacity to be moved
memPercentage := targetThresholds[v1.ResourceMemory] - node.usage[v1.ResourceMemory]
totalMem += ((float64(memPercentage) * float64(nodeCapacity.Memory().Value())) / 100)
}
klog.V(1).Infof("Total capacity to be moved: CPU:%v, Mem:%v, Pods:%v", totalCPU, totalMem, totalPods)
klog.V(1).Infof("********Number of pods evicted from each node:***********")
for _, node := range targetNodes {
nodeCapacity := node.node.Status.Capacity
if len(node.node.Status.Allocatable) > 0 {
nodeCapacity = node.node.Status.Allocatable
}
klog.V(3).Infof("evicting pods from node %#v with usage: %#v", node.node.Name, node.usage)
nonRemovablePods, removablePods := classifyPods(node.allPods, podEvictor)
klog.V(2).Infof("allPods:%v, nonRemovablePods:%v, removablePods:%v", len(node.allPods), len(nonRemovablePods), len(removablePods))
if len(removablePods) == 0 {
klog.V(1).Infof("no removable pods on node %#v, try next node", node.node.Name)
continue
}
klog.V(1).Infof("evicting pods based on priority, if they have same priority, they'll be evicted based on QoS tiers")
// sort the evictable Pods based on priority. This also sorts them based on QoS. If there are multiple pods with same priority, they are sorted based on QoS tiers.
podutil.SortPodsBasedOnPriorityLowToHigh(removablePods)
evictPods(ctx, removablePods, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, taintsOfLowNodes, podEvictor, node.node)
klog.V(1).Infof("%v pods evicted from node %#v with usage %v", podEvictor.NodeEvicted(node.node), node.node.Name, node.usage)
}
}
func evictPods(
ctx context.Context,
inputPods []*v1.Pod,
targetThresholds api.ResourceThresholds,
nodeCapacity v1.ResourceList,
nodeUsage api.ResourceThresholds,
totalPods *float64,
totalCPU *float64,
totalMem *float64,
taintsOfLowNodes map[string][]v1.Taint,
podEvictor *evictions.PodEvictor,
node *v1.Node) {
// stop if node utilization drops below target threshold or any of required capacity (cpu, memory, pods) is moved
if isNodeAboveTargetUtilization(nodeUsage, targetThresholds) && *totalPods > 0 && *totalCPU > 0 && *totalMem > 0 {
onePodPercentage := api.Percentage((float64(1) * 100) / float64(nodeCapacity.Pods().Value()))
for _, pod := range inputPods {
if !utils.PodToleratesTaints(pod, taintsOfLowNodes) {
klog.V(3).Infof("Skipping eviction for Pod: %#v, doesn't tolerate node taint", pod.Name)
continue
}
cUsage := utils.GetResourceRequest(pod, v1.ResourceCPU)
mUsage := utils.GetResourceRequest(pod, v1.ResourceMemory)
success, err := podEvictor.EvictPod(ctx, pod, node, "LowNodeUtilization")
if err != nil {
klog.Errorf("Error evicting pod: (%#v)", err)
break
}
if success {
klog.V(3).Infof("Evicted pod: %#v", pod.Name)
// update remaining pods
nodeUsage[v1.ResourcePods] -= onePodPercentage
*totalPods--
// update remaining cpu
*totalCPU -= float64(cUsage)
nodeUsage[v1.ResourceCPU] -= api.Percentage((float64(cUsage) * 100) / float64(nodeCapacity.Cpu().MilliValue()))
// update remaining memory
*totalMem -= float64(mUsage)
nodeUsage[v1.ResourceMemory] -= api.Percentage(float64(mUsage) / float64(nodeCapacity.Memory().Value()) * 100)
klog.V(3).Infof("updated node usage: %#v", nodeUsage)
// check if node utilization drops below target threshold or any required capacity (cpu, memory, pods) is moved
if !isNodeAboveTargetUtilization(nodeUsage, targetThresholds) || *totalPods <= 0 || *totalCPU <= 0 || *totalMem <= 0 {
break
}
}
}
}
}
// sortNodesByUsage sorts nodes based on usage in descending order
func sortNodesByUsage(nodes []NodeUsageMap) {
sort.Slice(nodes, func(i, j int) bool {
var ti, tj api.Percentage
for name, value := range nodes[i].usage {
if name == v1.ResourceCPU || name == v1.ResourceMemory || name == v1.ResourcePods {
ti += value
}
}
for name, value := range nodes[j].usage {
if name == v1.ResourceCPU || name == v1.ResourceMemory || name == v1.ResourcePods {
tj += value
}
}
// To return sorted in descending order
return ti > tj
})
}
// createNodePodsMap returns nodepodsmap with evictable pods on node.
func createNodePodsMap(ctx context.Context, client clientset.Interface, nodes []*v1.Node) NodePodsMap {
npm := NodePodsMap{}
for _, node := range nodes {
pods, err := podutil.ListPodsOnANode(ctx, client, node, nil)
if err != nil {
klog.Warningf("node %s will not be processed, error in accessing its pods (%#v)", node.Name, err)
} else {
npm[node] = pods
}
}
return npm
}
// isNodeAboveTargetUtilization checks if a node is overutilized
func isNodeAboveTargetUtilization(nodeThresholds api.ResourceThresholds, thresholds api.ResourceThresholds) bool {
for name, nodeValue := range nodeThresholds {
if name == v1.ResourceCPU || name == v1.ResourceMemory || name == v1.ResourcePods {
if value, ok := thresholds[name]; !ok {
continue
} else if nodeValue > value {
return true
}
}
}
return false
}
// isNodeWithLowUtilization checks if a node is underutilized
func isNodeWithLowUtilization(nodeThresholds api.ResourceThresholds, thresholds api.ResourceThresholds) bool {
for name, nodeValue := range nodeThresholds {
if name == v1.ResourceCPU || name == v1.ResourceMemory || name == v1.ResourcePods {
if value, ok := thresholds[name]; !ok {
continue
} else if nodeValue > value {
return false
}
}
}
return true
}
func nodeUtilization(node *v1.Node, pods []*v1.Pod) api.ResourceThresholds {
totalReqs := map[v1.ResourceName]*resource.Quantity{
v1.ResourceCPU: {},
v1.ResourceMemory: {},
}
for _, pod := range pods {
req, _ := utils.PodRequestsAndLimits(pod)
for name, quantity := range req {
if name == v1.ResourceCPU || name == v1.ResourceMemory {
// As Quantity.Add says: Add adds the provided y quantity to the current value. If the current value is zero,
// the format of the quantity will be updated to the format of y.
totalReqs[name].Add(quantity)
}
}
}
nodeCapacity := node.Status.Capacity
if len(node.Status.Allocatable) > 0 {
nodeCapacity = node.Status.Allocatable
}
totalPods := len(pods)
return api.ResourceThresholds{
v1.ResourceCPU: api.Percentage((float64(totalReqs[v1.ResourceCPU].MilliValue()) * 100) / float64(nodeCapacity.Cpu().MilliValue())),
v1.ResourceMemory: api.Percentage(float64(totalReqs[v1.ResourceMemory].Value()) / float64(nodeCapacity.Memory().Value()) * 100),
v1.ResourcePods: api.Percentage((float64(totalPods) * 100) / float64(nodeCapacity.Pods().Value())),
}
}
func classifyPods(pods []*v1.Pod, evictor *evictions.PodEvictor) ([]*v1.Pod, []*v1.Pod) {
var nonRemovablePods, removablePods []*v1.Pod
for _, pod := range pods {
if !evictor.IsEvictable(pod) {
nonRemovablePods = append(nonRemovablePods, pod)
} else {
removablePods = append(removablePods, pod)
}
}
return nonRemovablePods, removablePods
}

View File

@@ -1,832 +0,0 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package strategies
import (
"context"
"fmt"
"strings"
"testing"
v1 "k8s.io/api/core/v1"
"k8s.io/api/policy/v1beta1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/apimachinery/pkg/runtime/serializer"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
"sigs.k8s.io/descheduler/pkg/utils"
"sigs.k8s.io/descheduler/test"
)
var (
lowPriority = int32(0)
highPriority = int32(10000)
)
func TestLowNodeUtilization(t *testing.T) {
ctx := context.Background()
n1NodeName := "n1"
n2NodeName := "n2"
n3NodeName := "n3"
testCases := []struct {
name string
thresholds, targetThresholds api.ResourceThresholds
nodes map[string]*v1.Node
pods map[string]*v1.PodList
maxPodsToEvictPerNode int
expectedPodsEvicted int
evictedPods []string
}{
{
name: "no evictable pods",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
v1.ResourcePods: 30,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 50,
v1.ResourcePods: 50,
},
nodes: map[string]*v1.Node{
n1NodeName: test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil),
n2NodeName: test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
n3NodeName: test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable),
},
pods: map[string]*v1.PodList{
n1NodeName: {
Items: []v1.Pod{
// These won't be evicted.
*test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetDSOwnerRef),
*test.BuildTestPod("p2", 400, 0, n1NodeName, test.SetDSOwnerRef),
*test.BuildTestPod("p3", 400, 0, n1NodeName, test.SetDSOwnerRef),
*test.BuildTestPod("p4", 400, 0, n1NodeName, test.SetDSOwnerRef),
*test.BuildTestPod("p5", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A pod with local storage.
test.SetNormalOwnerRef(pod)
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
// A Mirror Pod.
pod.Annotations = test.GetMirrorPodAnnotation()
}),
*test.BuildTestPod("p6", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A Critical Pod.
pod.Namespace = "kube-system"
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
}),
},
},
n2NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p9", 400, 0, n1NodeName, test.SetRSOwnerRef),
},
},
n3NodeName: {},
},
maxPodsToEvictPerNode: 0,
expectedPodsEvicted: 0,
},
{
name: "without priorities",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
v1.ResourcePods: 30,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 50,
v1.ResourcePods: 50,
},
nodes: map[string]*v1.Node{
n1NodeName: test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil),
n2NodeName: test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
n3NodeName: test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable),
},
pods: map[string]*v1.PodList{
n1NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p2", 400, 0, n1NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p3", 400, 0, n1NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p4", 400, 0, n1NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p5", 400, 0, n1NodeName, test.SetRSOwnerRef),
// These won't be evicted.
*test.BuildTestPod("p6", 400, 0, n1NodeName, test.SetDSOwnerRef),
*test.BuildTestPod("p7", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A pod with local storage.
test.SetNormalOwnerRef(pod)
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
// A Mirror Pod.
pod.Annotations = test.GetMirrorPodAnnotation()
}),
*test.BuildTestPod("p8", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A Critical Pod.
pod.Namespace = "kube-system"
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
}),
},
},
n2NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p9", 400, 0, n1NodeName, test.SetRSOwnerRef),
},
},
n3NodeName: {},
},
maxPodsToEvictPerNode: 0,
expectedPodsEvicted: 4,
},
{
name: "without priorities stop when cpu capacity is depleted",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
v1.ResourcePods: 30,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 50,
v1.ResourcePods: 50,
},
nodes: map[string]*v1.Node{
n1NodeName: test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil),
n2NodeName: test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
n3NodeName: test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable),
},
pods: map[string]*v1.PodList{
n1NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p1", 400, 300, n1NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p2", 400, 300, n1NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p3", 400, 300, n1NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p4", 400, 300, n1NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p5", 400, 300, n1NodeName, test.SetRSOwnerRef),
// These won't be evicted.
*test.BuildTestPod("p6", 400, 300, n1NodeName, test.SetDSOwnerRef),
*test.BuildTestPod("p7", 400, 300, n1NodeName, func(pod *v1.Pod) {
// A pod with local storage.
test.SetNormalOwnerRef(pod)
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
// A Mirror Pod.
pod.Annotations = test.GetMirrorPodAnnotation()
}),
*test.BuildTestPod("p8", 400, 300, n1NodeName, func(pod *v1.Pod) {
// A Critical Pod.
pod.Namespace = "kube-system"
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
}),
},
},
n2NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p9", 400, 2100, n1NodeName, test.SetRSOwnerRef),
},
},
n3NodeName: {},
},
maxPodsToEvictPerNode: 0,
// 4 pods available for eviction based on v1.ResourcePods, only 3 pods can be evicted before cpu is depleted
expectedPodsEvicted: 3,
},
{
name: "with priorities",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
v1.ResourcePods: 30,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 50,
v1.ResourcePods: 50,
},
nodes: map[string]*v1.Node{
n1NodeName: test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil),
n2NodeName: test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
n3NodeName: test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable),
},
pods: map[string]*v1.PodList{
n1NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p1", 400, 0, n1NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.SetPodPriority(pod, highPriority)
}),
*test.BuildTestPod("p2", 400, 0, n1NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.SetPodPriority(pod, highPriority)
}),
*test.BuildTestPod("p3", 400, 0, n1NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.SetPodPriority(pod, highPriority)
}),
*test.BuildTestPod("p4", 400, 0, n1NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.SetPodPriority(pod, highPriority)
}),
*test.BuildTestPod("p5", 400, 0, n1NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.SetPodPriority(pod, lowPriority)
}),
// These won't be evicted.
*test.BuildTestPod("p6", 400, 0, n1NodeName, func(pod *v1.Pod) {
test.SetDSOwnerRef(pod)
test.SetPodPriority(pod, highPriority)
}),
*test.BuildTestPod("p7", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A pod with local storage.
test.SetNormalOwnerRef(pod)
test.SetPodPriority(pod, lowPriority)
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
// A Mirror Pod.
pod.Annotations = test.GetMirrorPodAnnotation()
}),
*test.BuildTestPod("p8", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A Critical Pod.
pod.Namespace = "kube-system"
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
}),
},
},
n2NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p9", 400, 0, n1NodeName, test.SetRSOwnerRef),
},
},
n3NodeName: {},
},
maxPodsToEvictPerNode: 0,
expectedPodsEvicted: 4,
},
{
name: "without priorities evicting best-effort pods only",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
v1.ResourcePods: 30,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 50,
v1.ResourcePods: 50,
},
nodes: map[string]*v1.Node{
n1NodeName: test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil),
n2NodeName: test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
n3NodeName: test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable),
},
// All pods are assumed to be burstable (test.BuildTestNode always sets both cpu/memory resource requests to some value)
pods: map[string]*v1.PodList{
n1NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p1", 400, 0, n1NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.MakeBestEffortPod(pod)
}),
*test.BuildTestPod("p2", 400, 0, n1NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.MakeBestEffortPod(pod)
}),
*test.BuildTestPod("p3", 400, 0, n1NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
}),
*test.BuildTestPod("p4", 400, 0, n1NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.MakeBestEffortPod(pod)
}),
*test.BuildTestPod("p5", 400, 0, n1NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.MakeBestEffortPod(pod)
}),
// These won't be evicted.
*test.BuildTestPod("p6", 400, 0, n1NodeName, func(pod *v1.Pod) {
test.SetDSOwnerRef(pod)
}),
*test.BuildTestPod("p7", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A pod with local storage.
test.SetNormalOwnerRef(pod)
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
// A Mirror Pod.
pod.Annotations = test.GetMirrorPodAnnotation()
}),
*test.BuildTestPod("p8", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A Critical Pod.
pod.Namespace = "kube-system"
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
}),
},
},
n2NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p9", 400, 0, n1NodeName, test.SetRSOwnerRef),
},
},
n3NodeName: {},
},
maxPodsToEvictPerNode: 0,
expectedPodsEvicted: 4,
evictedPods: []string{"p1", "p2", "p4", "p5"},
},
}
for _, test := range testCases {
t.Run(test.name, func(t *testing.T) {
fakeClient := &fake.Clientset{}
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
list := action.(core.ListAction)
fieldString := list.GetListRestrictions().Fields.String()
if strings.Contains(fieldString, n1NodeName) {
return true, test.pods[n1NodeName], nil
}
if strings.Contains(fieldString, n2NodeName) {
return true, test.pods[n2NodeName], nil
}
if strings.Contains(fieldString, n3NodeName) {
return true, test.pods[n3NodeName], nil
}
return true, nil, fmt.Errorf("Failed to list: %v", list)
})
fakeClient.Fake.AddReactor("get", "nodes", func(action core.Action) (bool, runtime.Object, error) {
getAction := action.(core.GetAction)
if node, exists := test.nodes[getAction.GetName()]; exists {
return true, node, nil
}
return true, nil, fmt.Errorf("Wrong node: %v", getAction.GetName())
})
podsForEviction := make(map[string]struct{})
for _, pod := range test.evictedPods {
podsForEviction[pod] = struct{}{}
}
evictionFailed := false
if len(test.evictedPods) > 0 {
fakeClient.Fake.AddReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) {
getAction := action.(core.CreateAction)
obj := getAction.GetObject()
if eviction, ok := obj.(*v1beta1.Eviction); ok {
if _, exists := podsForEviction[eviction.Name]; exists {
return true, obj, nil
}
evictionFailed = true
return true, nil, fmt.Errorf("pod %q was unexpectedly evicted", eviction.Name)
}
return true, obj, nil
})
}
var nodes []*v1.Node
for _, node := range test.nodes {
nodes = append(nodes, node)
}
podEvictor := evictions.NewPodEvictor(
fakeClient,
"v1",
false,
test.maxPodsToEvictPerNode,
nodes,
false,
)
strategy := api.DeschedulerStrategy{
Enabled: true,
Params: &api.StrategyParameters{
NodeResourceUtilizationThresholds: &api.NodeResourceUtilizationThresholds{
Thresholds: test.thresholds,
TargetThresholds: test.targetThresholds,
},
},
}
LowNodeUtilization(ctx, fakeClient, strategy, nodes, podEvictor)
podsEvicted := podEvictor.TotalEvicted()
if test.expectedPodsEvicted != podsEvicted {
t.Errorf("Expected %#v pods to be evicted but %#v got evicted", test.expectedPodsEvicted, podsEvicted)
}
if evictionFailed {
t.Errorf("Pod evictions failed unexpectedly")
}
})
}
}
func TestValidateStrategyConfig(t *testing.T) {
tests := []struct {
name string
thresholds api.ResourceThresholds
targetThresholds api.ResourceThresholds
errInfo error
}{
{
name: "passing invalid thresholds",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourceMemory: 120,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
v1.ResourceMemory: 80,
},
errInfo: fmt.Errorf("thresholds config is not valid: %v", fmt.Errorf(
"%v threshold not in [%v, %v] range", v1.ResourceMemory, MinResourcePercentage, MaxResourcePercentage)),
},
{
name: "passing invalid targetThresholds",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourceMemory: 20,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
"resourceInvalid": 80,
},
errInfo: fmt.Errorf("targetThresholds config is not valid: %v",
fmt.Errorf("only cpu, memory, or pods thresholds can be specified")),
},
{
name: "thresholds and targetThresholds configured different num of resources",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourceMemory: 20,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
v1.ResourceMemory: 80,
v1.ResourcePods: 80,
},
errInfo: fmt.Errorf("thresholds and targetThresholds configured different resources"),
},
{
name: "thresholds and targetThresholds configured different resources",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourceMemory: 20,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
v1.ResourcePods: 80,
},
errInfo: fmt.Errorf("thresholds and targetThresholds configured different resources"),
},
{
name: "thresholds' CPU config value is greater than targetThresholds'",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 90,
v1.ResourceMemory: 20,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
v1.ResourceMemory: 80,
},
errInfo: fmt.Errorf("thresholds' %v percentage is greater than targetThresholds'", v1.ResourceCPU),
},
{
name: "passing valid strategy config",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourceMemory: 20,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
v1.ResourceMemory: 80,
},
errInfo: nil,
},
}
for _, testCase := range tests {
validateErr := validateStrategyConfig(testCase.thresholds, testCase.targetThresholds)
if validateErr == nil || testCase.errInfo == nil {
if validateErr != testCase.errInfo {
t.Errorf("expected validity of strategy config: thresholds %#v targetThresholds %#v\nto be %v but got %v instead",
testCase.thresholds, testCase.targetThresholds, testCase.errInfo, validateErr)
}
} else if validateErr.Error() != testCase.errInfo.Error() {
t.Errorf("expected validity of strategy config: thresholds %#v targetThresholds %#v\nto be %v but got %v instead",
testCase.thresholds, testCase.targetThresholds, testCase.errInfo, validateErr)
}
}
}
func TestValidateThresholds(t *testing.T) {
tests := []struct {
name string
input api.ResourceThresholds
errInfo error
}{
{
name: "passing nil map for threshold",
input: nil,
errInfo: fmt.Errorf("no resource threshold is configured"),
},
{
name: "passing no threshold",
input: api.ResourceThresholds{},
errInfo: fmt.Errorf("no resource threshold is configured"),
},
{
name: "passing unsupported resource name",
input: api.ResourceThresholds{
v1.ResourceCPU: 40,
v1.ResourceStorage: 25.5,
},
errInfo: fmt.Errorf("only cpu, memory, or pods thresholds can be specified"),
},
{
name: "passing invalid resource name",
input: api.ResourceThresholds{
v1.ResourceCPU: 40,
"coolResource": 42.0,
},
errInfo: fmt.Errorf("only cpu, memory, or pods thresholds can be specified"),
},
{
name: "passing invalid resource value",
input: api.ResourceThresholds{
v1.ResourceCPU: 110,
v1.ResourceMemory: 80,
},
errInfo: fmt.Errorf("%v threshold not in [%v, %v] range", v1.ResourceCPU, MinResourcePercentage, MaxResourcePercentage),
},
{
name: "passing a valid threshold with max and min resource value",
input: api.ResourceThresholds{
v1.ResourceCPU: 100,
v1.ResourceMemory: 0,
},
errInfo: nil,
},
{
name: "passing a valid threshold with only cpu",
input: api.ResourceThresholds{
v1.ResourceCPU: 80,
},
errInfo: nil,
},
{
name: "passing a valid threshold with cpu, memory and pods",
input: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourceMemory: 30,
v1.ResourcePods: 40,
},
errInfo: nil,
},
}
for _, test := range tests {
validateErr := validateThresholds(test.input)
if validateErr == nil || test.errInfo == nil {
if validateErr != test.errInfo {
t.Errorf("expected validity of threshold: %#v\nto be %v but got %v instead", test.input, test.errInfo, validateErr)
}
} else if validateErr.Error() != test.errInfo.Error() {
t.Errorf("expected validity of threshold: %#v\nto be %v but got %v instead", test.input, test.errInfo, validateErr)
}
}
}
func newFake(objects ...runtime.Object) *core.Fake {
scheme := runtime.NewScheme()
codecs := serializer.NewCodecFactory(scheme)
fake.AddToScheme(scheme)
o := core.NewObjectTracker(scheme, codecs.UniversalDecoder())
for _, obj := range objects {
if err := o.Add(obj); err != nil {
panic(err)
}
}
fakePtr := core.Fake{}
fakePtr.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
objs, err := o.List(
schema.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"},
schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Pod"},
action.GetNamespace(),
)
if err != nil {
return true, nil, err
}
obj := &v1.PodList{
Items: []v1.Pod{},
}
for _, pod := range objs.(*v1.PodList).Items {
podFieldSet := fields.Set(map[string]string{
"spec.nodeName": pod.Spec.NodeName,
"status.phase": string(pod.Status.Phase),
})
match := action.(core.ListAction).GetListRestrictions().Fields.Matches(podFieldSet)
if !match {
continue
}
obj.Items = append(obj.Items, *pod.DeepCopy())
}
return true, obj, nil
})
fakePtr.AddReactor("*", "*", core.ObjectReaction(o))
fakePtr.AddWatchReactor("*", core.DefaultWatchReactor(watch.NewFake(), nil))
return &fakePtr
}
func TestWithTaints(t *testing.T) {
ctx := context.Background()
strategy := api.DeschedulerStrategy{
Enabled: true,
Params: &api.StrategyParameters{
NodeResourceUtilizationThresholds: &api.NodeResourceUtilizationThresholds{
Thresholds: api.ResourceThresholds{
v1.ResourcePods: 20,
},
TargetThresholds: api.ResourceThresholds{
v1.ResourcePods: 70,
},
},
},
}
n1 := test.BuildTestNode("n1", 2000, 3000, 10, nil)
n2 := test.BuildTestNode("n2", 1000, 3000, 10, nil)
n3 := test.BuildTestNode("n3", 1000, 3000, 10, nil)
n3withTaints := n3.DeepCopy()
n3withTaints.Spec.Taints = []v1.Taint{
{
Key: "key",
Value: "value",
Effect: v1.TaintEffectNoSchedule,
},
}
podThatToleratesTaint := test.BuildTestPod("tolerate_pod", 200, 0, n1.Name, test.SetRSOwnerRef)
podThatToleratesTaint.Spec.Tolerations = []v1.Toleration{
{
Key: "key",
Value: "value",
},
}
tests := []struct {
name string
nodes []*v1.Node
pods []*v1.Pod
evictionsExpected int
}{
{
name: "No taints",
nodes: []*v1.Node{n1, n2, n3},
pods: []*v1.Pod{
//Node 1 pods
test.BuildTestPod(fmt.Sprintf("pod_1_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_2_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_3_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_4_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_5_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_6_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_7_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_8_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
// Node 2 pods
test.BuildTestPod(fmt.Sprintf("pod_9_%s", n2.Name), 200, 0, n2.Name, test.SetRSOwnerRef),
},
evictionsExpected: 1,
},
{
name: "No pod tolerates node taint",
nodes: []*v1.Node{n1, n3withTaints},
pods: []*v1.Pod{
//Node 1 pods
test.BuildTestPod(fmt.Sprintf("pod_1_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_2_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_3_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_4_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_5_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_6_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_7_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_8_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
// Node 3 pods
test.BuildTestPod(fmt.Sprintf("pod_9_%s", n3withTaints.Name), 200, 0, n3withTaints.Name, test.SetRSOwnerRef),
},
evictionsExpected: 0,
},
{
name: "Pod which tolerates node taint",
nodes: []*v1.Node{n1, n3withTaints},
pods: []*v1.Pod{
//Node 1 pods
test.BuildTestPod(fmt.Sprintf("pod_1_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_2_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_3_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_4_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_5_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_6_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_7_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
podThatToleratesTaint,
// Node 3 pods
test.BuildTestPod(fmt.Sprintf("pod_9_%s", n3withTaints.Name), 200, 0, n3withTaints.Name, test.SetRSOwnerRef),
},
evictionsExpected: 1,
},
}
for _, item := range tests {
t.Run(item.name, func(t *testing.T) {
var objs []runtime.Object
for _, node := range item.nodes {
objs = append(objs, node)
}
for _, pod := range item.pods {
objs = append(objs, pod)
}
fakePtr := newFake(objs...)
var evictionCounter int
fakePtr.PrependReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) {
if action.GetSubresource() != "eviction" || action.GetResource().Resource != "pods" {
return false, nil, nil
}
evictionCounter++
return true, nil, nil
})
podEvictor := evictions.NewPodEvictor(
&fake.Clientset{Fake: *fakePtr},
"policy/v1",
false,
item.evictionsExpected,
item.nodes,
false,
)
LowNodeUtilization(ctx, &fake.Clientset{Fake: *fakePtr}, strategy, item.nodes, podEvictor)
if item.evictionsExpected != evictionCounter {
t.Errorf("Expected %v evictions, got %v", item.evictionsExpected, evictionCounter)
}
})
}
}

View File

@@ -18,8 +18,10 @@ package strategies
import (
"context"
"fmt"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/sets"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
@@ -27,44 +29,92 @@ import (
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
)
func validatePodsViolatingNodeAffinityParams(params *api.StrategyParameters) error {
if params == nil || len(params.NodeAffinityType) == 0 {
return fmt.Errorf("NodeAffinityType is empty")
}
// At most one of include/exclude can be set
if params.Namespaces != nil && len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 {
return fmt.Errorf("only one of Include/Exclude namespaces can be set")
}
if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" {
return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set")
}
return nil
}
// RemovePodsViolatingNodeAffinity evicts pods on nodes which violate node affinity
func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor) {
if strategy.Params == nil {
klog.V(1).Infof("NodeAffinityType not set")
func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc) {
if err := validatePodsViolatingNodeAffinityParams(strategy.Params); err != nil {
klog.ErrorS(err, "Invalid RemovePodsViolatingNodeAffinity parameters")
return
}
thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params)
if err != nil {
klog.ErrorS(err, "Failed to get threshold priority from strategy's params")
return
}
var includedNamespaces, excludedNamespaces sets.String
if strategy.Params.Namespaces != nil {
includedNamespaces = sets.NewString(strategy.Params.Namespaces.Include...)
excludedNamespaces = sets.NewString(strategy.Params.Namespaces.Exclude...)
}
nodeFit := false
if strategy.Params != nil {
nodeFit = strategy.Params.NodeFit
}
evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit))
podFilter, err := podutil.NewOptions().
WithNamespaces(includedNamespaces).
WithoutNamespaces(excludedNamespaces).
WithLabelSelector(strategy.Params.LabelSelector).
BuildFilterFunc()
if err != nil {
klog.ErrorS(err, "Error initializing pod filter function")
return
}
for _, nodeAffinity := range strategy.Params.NodeAffinityType {
klog.V(2).Infof("Executing for nodeAffinityType: %v", nodeAffinity)
klog.V(2).InfoS("Executing for nodeAffinityType", "nodeAffinity", nodeAffinity)
switch nodeAffinity {
case "requiredDuringSchedulingIgnoredDuringExecution":
for _, node := range nodes {
klog.V(1).Infof("Processing node: %#v\n", node.Name)
klog.V(1).InfoS("Processing node", "node", klog.KObj(node))
pods, err := podutil.ListPodsOnANode(ctx, client, node, func(pod *v1.Pod) bool {
return podEvictor.IsEvictable(pod) &&
!nodeutil.PodFitsCurrentNode(pod, node) &&
nodeutil.PodFitsAnyNode(pod, nodes)
})
pods, err := podutil.ListPodsOnANode(
node.Name,
getPodsAssignedToNode,
podutil.WrapFilterFuncs(podFilter, func(pod *v1.Pod) bool {
return evictable.IsEvictable(pod) &&
!nodeutil.PodFitsCurrentNode(pod, node) &&
nodeutil.PodFitsAnyNode(pod, nodes)
}),
)
if err != nil {
klog.Errorf("failed to get pods from %v: %v", node.Name, err)
klog.ErrorS(err, "Failed to get pods", "node", klog.KObj(node))
}
for _, pod := range pods {
if pod.Spec.Affinity != nil && pod.Spec.Affinity.NodeAffinity != nil && pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil {
klog.V(1).Infof("Evicting pod: %v", pod.Name)
klog.V(1).InfoS("Evicting pod", "pod", klog.KObj(pod))
if _, err := podEvictor.EvictPod(ctx, pod, node, "NodeAffinity"); err != nil {
klog.Errorf("Error evicting pod: (%#v)", err)
klog.ErrorS(err, "Error evicting pod")
break
}
}
}
}
default:
klog.Errorf("invalid nodeAffinityType: %v", nodeAffinity)
klog.ErrorS(nil, "Invalid nodeAffinityType", "nodeAffinity", nodeAffinity)
}
}
klog.V(1).Infof("Evicted %v pods", podEvictor.TotalEvicted())
}

View File

@@ -20,17 +20,20 @@ import (
"context"
"testing"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
policyv1 "k8s.io/api/policy/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/test"
)
func TestRemovePodsViolatingNodeAffinity(t *testing.T) {
ctx := context.Background()
requiredDuringSchedulingIgnoredDuringExecutionStrategy := api.DeschedulerStrategy{
Enabled: true,
Params: &api.StrategyParameters{
@@ -40,6 +43,16 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) {
},
}
requiredDuringSchedulingIgnoredDuringExecutionWithNodeFitStrategy := api.DeschedulerStrategy{
Enabled: true,
Params: &api.StrategyParameters{
NodeAffinityType: []string{
"requiredDuringSchedulingIgnoredDuringExecution",
},
NodeFit: true,
},
}
nodeLabelKey := "kubernetes.io/desiredNode"
nodeLabelValue := "yes"
nodeWithLabels := test.BuildTestNode("nodeWithLabels", 2000, 3000, 10, nil)
@@ -48,10 +61,10 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) {
nodeWithoutLabels := test.BuildTestNode("nodeWithoutLabels", 2000, 3000, 10, nil)
unschedulableNodeWithLabels := test.BuildTestNode("unschedulableNodeWithLabels", 2000, 3000, 10, nil)
nodeWithLabels.Labels[nodeLabelKey] = nodeLabelValue
unschedulableNodeWithLabels.Labels[nodeLabelKey] = nodeLabelValue
unschedulableNodeWithLabels.Spec.Unschedulable = true
addPodsToNode := func(node *v1.Node) []v1.Pod {
addPodsToNode := func(node *v1.Node, deletionTimestamp *metav1.Time) []*v1.Pod {
podWithNodeAffinity := test.BuildTestPod("podWithNodeAffinity", 100, 0, node.Name, nil)
podWithNodeAffinity.Spec.Affinity = &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
@@ -80,20 +93,25 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) {
pod1.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod2.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
return []v1.Pod{
*podWithNodeAffinity,
*pod1,
*pod2,
pod1.DeletionTimestamp = deletionTimestamp
pod2.DeletionTimestamp = deletionTimestamp
return []*v1.Pod{
podWithNodeAffinity,
pod1,
pod2,
}
}
var uint1 uint = 1
tests := []struct {
description string
nodes []*v1.Node
pods []v1.Pod
strategy api.DeschedulerStrategy
expectedEvictedPodCount int
maxPodsToEvictPerNode int
description string
nodes []*v1.Node
pods []*v1.Pod
strategy api.DeschedulerStrategy
expectedEvictedPodCount uint
maxPodsToEvictPerNode *uint
maxNoOfPodsToEvictPerNamespace *uint
}{
{
description: "Invalid strategy type, should not evict any pods",
@@ -106,64 +124,116 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) {
},
},
expectedEvictedPodCount: 0,
pods: addPodsToNode(nodeWithoutLabels),
pods: addPodsToNode(nodeWithoutLabels, nil),
nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels},
maxPodsToEvictPerNode: 0,
},
{
description: "Pod is correctly scheduled on node, no eviction expected",
strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy,
expectedEvictedPodCount: 0,
pods: addPodsToNode(nodeWithLabels),
pods: addPodsToNode(nodeWithLabels, nil),
nodes: []*v1.Node{nodeWithLabels},
maxPodsToEvictPerNode: 0,
},
{
description: "Pod is scheduled on node without matching labels, another schedulable node available, should be evicted",
expectedEvictedPodCount: 1,
strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy,
pods: addPodsToNode(nodeWithoutLabels),
pods: addPodsToNode(nodeWithoutLabels, nil),
nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels},
maxPodsToEvictPerNode: 0,
},
{
description: "Pod is scheduled on node without matching labels, another schedulable node available, maxPodsToEvictPerNode set to 1, should not be evicted",
expectedEvictedPodCount: 1,
strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy,
pods: addPodsToNode(nodeWithoutLabels),
pods: addPodsToNode(nodeWithoutLabels, nil),
nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels},
maxPodsToEvictPerNode: 1,
maxPodsToEvictPerNode: &uint1,
},
{
description: "Pod is scheduled on node without matching labels, another schedulable node available, maxPodsToEvictPerNode set to 1, no pod evicted since pod terminting",
expectedEvictedPodCount: 1,
strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy,
pods: addPodsToNode(nodeWithoutLabels, &metav1.Time{}),
nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels},
maxPodsToEvictPerNode: &uint1,
},
{
description: "Pod is scheduled on node without matching labels, another schedulable node available, maxNoOfPodsToEvictPerNamespace set to 1, should not be evicted",
expectedEvictedPodCount: 1,
strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy,
pods: addPodsToNode(nodeWithoutLabels, nil),
nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels},
maxNoOfPodsToEvictPerNamespace: &uint1,
},
{
description: "Pod is scheduled on node without matching labels, another schedulable node available, maxNoOfPodsToEvictPerNamespace set to 1, no pod evicted since pod terminting",
expectedEvictedPodCount: 1,
strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy,
pods: addPodsToNode(nodeWithoutLabels, &metav1.Time{}),
nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels},
maxNoOfPodsToEvictPerNamespace: &uint1,
},
{
description: "Pod is scheduled on node without matching labels, but no node where pod fits is available, should not evict",
expectedEvictedPodCount: 0,
strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy,
pods: addPodsToNode(nodeWithoutLabels),
strategy: requiredDuringSchedulingIgnoredDuringExecutionWithNodeFitStrategy,
pods: addPodsToNode(nodeWithoutLabels, nil),
nodes: []*v1.Node{nodeWithoutLabels, unschedulableNodeWithLabels},
maxPodsToEvictPerNode: 0,
},
{
description: "Pod is scheduled on node without matching labels, and node where pod fits is available, should evict",
expectedEvictedPodCount: 0,
strategy: requiredDuringSchedulingIgnoredDuringExecutionWithNodeFitStrategy,
pods: addPodsToNode(nodeWithoutLabels, nil),
nodes: []*v1.Node{nodeWithLabels, unschedulableNodeWithLabels},
maxPodsToEvictPerNode: &uint1,
},
}
for _, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
fakeClient := &fake.Clientset{}
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
return true, &v1.PodList{Items: tc.pods}, nil
var objs []runtime.Object
for _, node := range tc.nodes {
objs = append(objs, node)
}
for _, pod := range tc.pods {
objs = append(objs, pod)
}
fakeClient := fake.NewSimpleClientset(objs...)
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
podInformer := sharedInformerFactory.Core().V1().Pods()
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {
t.Errorf("Build get pods assigned to node function error: %v", err)
}
sharedInformerFactory.Start(ctx.Done())
sharedInformerFactory.WaitForCacheSync(ctx.Done())
podEvictor := evictions.NewPodEvictor(
fakeClient,
policyv1.SchemeGroupVersion.String(),
false,
tc.maxPodsToEvictPerNode,
tc.maxNoOfPodsToEvictPerNamespace,
tc.nodes,
false,
false,
false,
false,
false,
)
RemovePodsViolatingNodeAffinity(ctx, fakeClient, tc.strategy, tc.nodes, podEvictor, getPodsAssignedToNode)
actualEvictedPodCount := podEvictor.TotalEvicted()
if actualEvictedPodCount != tc.expectedEvictedPodCount {
t.Errorf("Test %#v failed, expected %v pod evictions, but got %v pod evictions\n", tc.description, tc.expectedEvictedPodCount, actualEvictedPodCount)
}
})
podEvictor := evictions.NewPodEvictor(
fakeClient,
"v1",
false,
tc.maxPodsToEvictPerNode,
tc.nodes,
false,
)
RemovePodsViolatingNodeAffinity(ctx, fakeClient, tc.strategy, tc.nodes, podEvictor)
actualEvictedPodCount := podEvictor.TotalEvicted()
if actualEvictedPodCount != tc.expectedEvictedPodCount {
t.Errorf("Test %#v failed, expected %v pod evictions, but got %v pod evictions\n", tc.description, tc.expectedEvictedPodCount, actualEvictedPodCount)
}
}
}

View File

@@ -18,22 +18,80 @@ package strategies
import (
"context"
"fmt"
"k8s.io/apimachinery/pkg/util/sets"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
)
func validateRemovePodsViolatingNodeTaintsParams(params *api.StrategyParameters) error {
if params == nil {
return nil
}
// At most one of include/exclude can be set
if params.Namespaces != nil && len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 {
return fmt.Errorf("only one of Include/Exclude namespaces can be set")
}
if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" {
return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set")
}
return nil
}
// RemovePodsViolatingNodeTaints evicts pods on the node which violate NoSchedule Taints on nodes
func RemovePodsViolatingNodeTaints(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor) {
func RemovePodsViolatingNodeTaints(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc) {
if err := validateRemovePodsViolatingNodeTaintsParams(strategy.Params); err != nil {
klog.ErrorS(err, "Invalid RemovePodsViolatingNodeTaints parameters")
return
}
var includedNamespaces, excludedNamespaces sets.String
var labelSelector *metav1.LabelSelector
if strategy.Params != nil {
if strategy.Params.Namespaces != nil {
includedNamespaces = sets.NewString(strategy.Params.Namespaces.Include...)
excludedNamespaces = sets.NewString(strategy.Params.Namespaces.Exclude...)
}
labelSelector = strategy.Params.LabelSelector
}
thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params)
if err != nil {
klog.ErrorS(err, "Failed to get threshold priority from strategy's params")
return
}
nodeFit := false
if strategy.Params != nil {
nodeFit = strategy.Params.NodeFit
}
evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit))
podFilter, err := podutil.NewOptions().
WithFilter(evictable.IsEvictable).
WithNamespaces(includedNamespaces).
WithoutNamespaces(excludedNamespaces).
WithLabelSelector(labelSelector).
BuildFilterFunc()
if err != nil {
klog.ErrorS(err, "Error initializing pod filter function")
return
}
for _, node := range nodes {
klog.V(1).Infof("Processing node: %#v\n", node.Name)
pods, err := podutil.ListPodsOnANode(ctx, client, node, podEvictor.IsEvictable)
klog.V(1).InfoS("Processing node", "node", klog.KObj(node))
pods, err := podutil.ListAllPodsOnANode(node.Name, getPodsAssignedToNode, podFilter)
if err != nil {
//no pods evicted as error encountered retrieving evictable Pods
return
@@ -45,9 +103,9 @@ func RemovePodsViolatingNodeTaints(ctx context.Context, client clientset.Interfa
node.Spec.Taints,
func(taint *v1.Taint) bool { return taint.Effect == v1.TaintEffectNoSchedule },
) {
klog.V(2).Infof("Not all taints with NoSchedule effect are tolerated after update for pod %v on node %v", pods[i].Name, node.Name)
klog.V(2).InfoS("Not all taints with NoSchedule effect are tolerated after update for pod on node", "pod", klog.KObj(pods[i]), "node", klog.KObj(node))
if _, err := podEvictor.EvictPod(ctx, pods[i], node, "NodeTaint"); err != nil {
klog.Errorf("Error evicting pod: (%#v)", err)
klog.ErrorS(err, "Error evicting pod")
break
}
}

View File

@@ -5,13 +5,16 @@ import (
"fmt"
"testing"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
policyv1 "k8s.io/api/policy/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
"sigs.k8s.io/descheduler/test"
)
@@ -44,11 +47,21 @@ func addTolerationToPod(pod *v1.Pod, key, value string, index int) *v1.Pod {
}
func TestDeletePodsViolatingNodeTaints(t *testing.T) {
ctx := context.Background()
node1 := test.BuildTestNode("n1", 2000, 3000, 10, nil)
node1 = addTaintsToNode(node1, "testTaint", "test", []int{1})
node2 := test.BuildTestNode("n2", 2000, 3000, 10, nil)
node1 = addTaintsToNode(node2, "testingTaint", "testing", []int{1})
node2 = addTaintsToNode(node2, "testingTaint", "testing", []int{1})
node3 := test.BuildTestNode("n3", 2000, 3000, 10, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
"datacenter": "east",
}
})
node4 := test.BuildTestNode("n4", 2000, 3000, 10, func(node *v1.Node) {
node.Spec = v1.NodeSpec{
Unschedulable: true,
}
})
p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil)
p2 := test.BuildTestPod("p2", 100, 0, node1.Name, nil)
@@ -69,12 +82,15 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) {
p10 := test.BuildTestPod("p10", 100, 0, node2.Name, nil)
p11 := test.BuildTestPod("p11", 100, 0, node2.Name, nil)
p11.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
p12 := test.BuildTestPod("p11", 100, 0, node2.Name, nil)
p12.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
// The following 4 pods won't get evicted.
// A Critical Pod.
p7.Namespace = "kube-system"
priority := utils.SystemCriticalPriority
p7.Spec.Priority = &priority
p7.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
// A daemonset.
p8.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
@@ -97,89 +113,172 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) {
p3 = addTolerationToPod(p3, "testTaint", "test", 1)
p4 = addTolerationToPod(p4, "testTaintX", "testX", 1)
p12.Spec.NodeSelector = map[string]string{
"datacenter": "west",
}
var uint1 uint = 1
tests := []struct {
description string
nodes []*v1.Node
pods []v1.Pod
evictLocalStoragePods bool
maxPodsToEvictPerNode int
expectedEvictedPodCount int
description string
nodes []*v1.Node
pods []*v1.Pod
evictLocalStoragePods bool
evictSystemCriticalPods bool
maxPodsToEvictPerNode *uint
maxNoOfPodsToEvictPerNamespace *uint
expectedEvictedPodCount uint
nodeFit bool
}{
{
description: "Pods not tolerating node taint should be evicted",
pods: []v1.Pod{*p1, *p2, *p3},
pods: []*v1.Pod{p1, p2, p3},
nodes: []*v1.Node{node1},
evictLocalStoragePods: false,
maxPodsToEvictPerNode: 0,
evictSystemCriticalPods: false,
expectedEvictedPodCount: 1, //p2 gets evicted
},
{
description: "Pods with tolerations but not tolerating node taint should be evicted",
pods: []v1.Pod{*p1, *p3, *p4},
pods: []*v1.Pod{p1, p3, p4},
nodes: []*v1.Node{node1},
evictLocalStoragePods: false,
maxPodsToEvictPerNode: 0,
evictSystemCriticalPods: false,
expectedEvictedPodCount: 1, //p4 gets evicted
},
{
description: "Only <maxPodsToEvictPerNode> number of Pods not tolerating node taint should be evicted",
pods: []v1.Pod{*p1, *p5, *p6},
pods: []*v1.Pod{p1, p5, p6},
nodes: []*v1.Node{node1},
evictLocalStoragePods: false,
maxPodsToEvictPerNode: 1,
evictSystemCriticalPods: false,
maxPodsToEvictPerNode: &uint1,
expectedEvictedPodCount: 1, //p5 or p6 gets evicted
},
{
description: "Only <maxNoOfPodsToEvictPerNamespace> number of Pods not tolerating node taint should be evicted",
pods: []*v1.Pod{p1, p5, p6},
nodes: []*v1.Node{node1},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
maxNoOfPodsToEvictPerNamespace: &uint1,
expectedEvictedPodCount: 1, //p5 or p6 gets evicted
},
{
description: "Critical pods not tolerating node taint should not be evicted",
pods: []v1.Pod{*p7, *p8, *p9, *p10},
pods: []*v1.Pod{p7, p8, p9, p10},
nodes: []*v1.Node{node2},
evictLocalStoragePods: false,
maxPodsToEvictPerNode: 0,
expectedEvictedPodCount: 0,
evictSystemCriticalPods: false,
expectedEvictedPodCount: 0, //nothing is evicted
},
{
description: "Critical pods except storage pods not tolerating node taint should not be evicted",
pods: []v1.Pod{*p7, *p8, *p9, *p10},
pods: []*v1.Pod{p7, p8, p9, p10},
nodes: []*v1.Node{node2},
evictLocalStoragePods: true,
maxPodsToEvictPerNode: 0,
expectedEvictedPodCount: 1,
evictSystemCriticalPods: false,
expectedEvictedPodCount: 1, //p9 gets evicted
},
{
description: "Critical and non critical pods, only non critical pods not tolerating node taint should be evicted",
pods: []v1.Pod{*p7, *p8, *p10, *p11},
pods: []*v1.Pod{p7, p8, p10, p11},
nodes: []*v1.Node{node2},
evictLocalStoragePods: false,
maxPodsToEvictPerNode: 0,
expectedEvictedPodCount: 1,
evictSystemCriticalPods: false,
expectedEvictedPodCount: 1, //p11 gets evicted
},
{
description: "Critical and non critical pods, pods not tolerating node taint should be evicted even if they are critical",
pods: []*v1.Pod{p2, p7, p9, p10},
nodes: []*v1.Node{node1, node2},
evictLocalStoragePods: false,
evictSystemCriticalPods: true,
expectedEvictedPodCount: 2, //p2 and p7 are evicted
},
{
description: "Pod p2 doesn't tolerate taint on it's node, but also doesn't tolerate taints on other nodes",
pods: []*v1.Pod{p1, p2, p3},
nodes: []*v1.Node{node1, node2},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
expectedEvictedPodCount: 0, //p2 gets evicted
nodeFit: true,
},
{
description: "Pod p12 doesn't tolerate taint on it's node, but other nodes don't match it's selector",
pods: []*v1.Pod{p1, p3, p12},
nodes: []*v1.Node{node1, node3},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
expectedEvictedPodCount: 0, //p2 gets evicted
nodeFit: true,
},
{
description: "Pod p2 doesn't tolerate taint on it's node, but other nodes are unschedulable",
pods: []*v1.Pod{p1, p2, p3},
nodes: []*v1.Node{node1, node4},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
expectedEvictedPodCount: 0, //p2 gets evicted
nodeFit: true,
},
}
for _, tc := range tests {
t.Run(tc.description, func(t *testing.T) {
// create fake client
fakeClient := &fake.Clientset{}
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
return true, &v1.PodList{Items: tc.pods}, nil
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
var objs []runtime.Object
for _, node := range tc.nodes {
objs = append(objs, node)
}
for _, pod := range tc.pods {
objs = append(objs, pod)
}
fakeClient := fake.NewSimpleClientset(objs...)
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
podInformer := sharedInformerFactory.Core().V1().Pods()
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {
t.Errorf("Build get pods assigned to node function error: %v", err)
}
sharedInformerFactory.Start(ctx.Done())
sharedInformerFactory.WaitForCacheSync(ctx.Done())
podEvictor := evictions.NewPodEvictor(
fakeClient,
policyv1.SchemeGroupVersion.String(),
false,
tc.maxPodsToEvictPerNode,
tc.maxNoOfPodsToEvictPerNamespace,
tc.nodes,
tc.evictLocalStoragePods,
tc.evictSystemCriticalPods,
false,
false,
false,
)
strategy := api.DeschedulerStrategy{
Params: &api.StrategyParameters{
NodeFit: tc.nodeFit,
},
}
RemovePodsViolatingNodeTaints(ctx, fakeClient, strategy, tc.nodes, podEvictor, getPodsAssignedToNode)
actualEvictedPodCount := podEvictor.TotalEvicted()
if actualEvictedPodCount != tc.expectedEvictedPodCount {
t.Errorf("Test %#v failed, Unexpected no of pods evicted: pods evicted: %d, expected: %d", tc.description, actualEvictedPodCount, tc.expectedEvictedPodCount)
}
})
podEvictor := evictions.NewPodEvictor(
fakeClient,
"v1",
false,
tc.maxPodsToEvictPerNode,
tc.nodes,
tc.evictLocalStoragePods,
)
RemovePodsViolatingNodeTaints(ctx, fakeClient, api.DeschedulerStrategy{}, tc.nodes, podEvictor)
actualEvictedPodCount := podEvictor.TotalEvicted()
if actualEvictedPodCount != tc.expectedEvictedPodCount {
t.Errorf("Test %#v failed, Unexpected no of pods evicted: pods evicted: %d, expected: %d", tc.description, actualEvictedPodCount, tc.expectedEvictedPodCount)
}
}
}
func TestToleratesTaint(t *testing.T) {

View File

@@ -0,0 +1,166 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nodeutilization
import (
"context"
"fmt"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
)
// HighNodeUtilization evicts pods from under utilized nodes so that scheduler can schedule according to its strategy.
// Note that CPU/Memory requests are used to calculate nodes' utilization and not the actual resource usage.
func HighNodeUtilization(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc) {
if err := validateNodeUtilizationParams(strategy.Params); err != nil {
klog.ErrorS(err, "Invalid HighNodeUtilization parameters")
return
}
nodeFit := false
if strategy.Params != nil {
nodeFit = strategy.Params.NodeFit
}
thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params)
if err != nil {
klog.ErrorS(err, "Failed to get threshold priority from strategy's params")
return
}
thresholds := strategy.Params.NodeResourceUtilizationThresholds.Thresholds
targetThresholds := strategy.Params.NodeResourceUtilizationThresholds.TargetThresholds
if err := validateHighUtilizationStrategyConfig(thresholds, targetThresholds); err != nil {
klog.ErrorS(err, "HighNodeUtilization config is not valid")
return
}
targetThresholds = make(api.ResourceThresholds)
setDefaultForThresholds(thresholds, targetThresholds)
resourceNames := getResourceNames(targetThresholds)
sourceNodes, highNodes := classifyNodes(
getNodeUsage(nodes, thresholds, targetThresholds, resourceNames, getPodsAssignedToNode),
func(node *v1.Node, usage NodeUsage) bool {
return isNodeWithLowUtilization(usage)
},
func(node *v1.Node, usage NodeUsage) bool {
if nodeutil.IsNodeUnschedulable(node) {
klog.V(2).InfoS("Node is unschedulable", "node", klog.KObj(node))
return false
}
return !isNodeWithLowUtilization(usage)
})
// log message in one line
keysAndValues := []interface{}{
"CPU", thresholds[v1.ResourceCPU],
"Mem", thresholds[v1.ResourceMemory],
"Pods", thresholds[v1.ResourcePods],
}
for name := range thresholds {
if !isBasicResource(name) {
keysAndValues = append(keysAndValues, string(name), int64(thresholds[name]))
}
}
klog.V(1).InfoS("Criteria for a node below target utilization", keysAndValues...)
klog.V(1).InfoS("Number of underutilized nodes", "totalNumber", len(sourceNodes))
if len(sourceNodes) == 0 {
klog.V(1).InfoS("No node is underutilized, nothing to do here, you might tune your thresholds further")
return
}
if len(sourceNodes) <= strategy.Params.NodeResourceUtilizationThresholds.NumberOfNodes {
klog.V(1).InfoS("Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here", "underutilizedNodes", len(sourceNodes), "numberOfNodes", strategy.Params.NodeResourceUtilizationThresholds.NumberOfNodes)
return
}
if len(sourceNodes) == len(nodes) {
klog.V(1).InfoS("All nodes are underutilized, nothing to do here")
return
}
if len(highNodes) == 0 {
klog.V(1).InfoS("No node is available to schedule the pods, nothing to do here")
return
}
evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit))
// stop if the total available usage has dropped to zero - no more pods can be scheduled
continueEvictionCond := func(nodeUsage NodeUsage, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool {
for name := range totalAvailableUsage {
if totalAvailableUsage[name].CmpInt64(0) < 1 {
return false
}
}
return true
}
evictPodsFromSourceNodes(
ctx,
sourceNodes,
highNodes,
podEvictor,
evictable.IsEvictable,
resourceNames,
"HighNodeUtilization",
continueEvictionCond)
}
func validateHighUtilizationStrategyConfig(thresholds, targetThresholds api.ResourceThresholds) error {
if targetThresholds != nil {
return fmt.Errorf("targetThresholds is not applicable for HighNodeUtilization")
}
if err := validateThresholds(thresholds); err != nil {
return fmt.Errorf("thresholds config is not valid: %v", err)
}
return nil
}
func setDefaultForThresholds(thresholds, targetThresholds api.ResourceThresholds) {
// check if Pods/CPU/Mem are set, if not, set them to 100
if _, ok := thresholds[v1.ResourcePods]; !ok {
thresholds[v1.ResourcePods] = MaxResourcePercentage
}
if _, ok := thresholds[v1.ResourceCPU]; !ok {
thresholds[v1.ResourceCPU] = MaxResourcePercentage
}
if _, ok := thresholds[v1.ResourceMemory]; !ok {
thresholds[v1.ResourceMemory] = MaxResourcePercentage
}
// Default targetThreshold resource values to 100
targetThresholds[v1.ResourcePods] = MaxResourcePercentage
targetThresholds[v1.ResourceCPU] = MaxResourcePercentage
targetThresholds[v1.ResourceMemory] = MaxResourcePercentage
for name := range thresholds {
if !isBasicResource(name) {
targetThresholds[name] = MaxResourcePercentage
}
}
}

View File

@@ -0,0 +1,685 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nodeutilization
import (
"context"
"fmt"
"testing"
v1 "k8s.io/api/core/v1"
"k8s.io/api/policy/v1beta1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
"sigs.k8s.io/descheduler/test"
)
func TestHighNodeUtilization(t *testing.T) {
n1NodeName := "n1"
n2NodeName := "n2"
n3NodeName := "n3"
nodeSelectorKey := "datacenter"
nodeSelectorValue := "west"
testCases := []struct {
name string
thresholds api.ResourceThresholds
nodes []*v1.Node
pods []*v1.Pod
expectedPodsEvicted uint
evictedPods []string
}{
{
name: "no node below threshold usage",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourcePods: 20,
},
nodes: []*v1.Node{
test.BuildTestNode(n1NodeName, 4000, 3000, 10, nil),
test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
test.BuildTestNode(n3NodeName, 4000, 3000, 10, nil),
},
pods: []*v1.Pod{
// These won't be evicted.
test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p2", 400, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p3", 400, 0, n1NodeName, test.SetRSOwnerRef),
// These won't be evicted.
test.BuildTestPod("p4", 400, 0, n2NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p5", 400, 0, n2NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p6", 400, 0, n2NodeName, test.SetRSOwnerRef),
// These won't be evicted.
test.BuildTestPod("p7", 400, 0, n3NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p8", 400, 0, n3NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p9", 400, 0, n3NodeName, test.SetRSOwnerRef),
},
expectedPodsEvicted: 0,
},
{
name: "no evictable pods",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 40,
v1.ResourcePods: 40,
},
nodes: []*v1.Node{
test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil),
test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
test.BuildTestNode(n3NodeName, 4000, 3000, 10, nil),
},
pods: []*v1.Pod{
// These won't be evicted.
test.BuildTestPod("p1", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A pod with local storage.
test.SetNormalOwnerRef(pod)
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
// A Mirror Pod.
pod.Annotations = test.GetMirrorPodAnnotation()
}),
test.BuildTestPod("p2", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A Critical Pod.
pod.Namespace = "kube-system"
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
}),
// These won't be evicted.
test.BuildTestPod("p3", 400, 0, n2NodeName, test.SetDSOwnerRef),
test.BuildTestPod("p4", 400, 0, n2NodeName, test.SetDSOwnerRef),
test.BuildTestPod("p5", 400, 0, n3NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p6", 400, 0, n3NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p7", 400, 0, n3NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p8", 400, 0, n3NodeName, test.SetRSOwnerRef),
},
expectedPodsEvicted: 0,
},
{
name: "no node to schedule evicted pods",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourcePods: 20,
},
nodes: []*v1.Node{
test.BuildTestNode(n1NodeName, 4000, 3000, 10, nil),
test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable),
},
pods: []*v1.Pod{
// These can't be evicted.
test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef),
// These can't be evicted.
test.BuildTestPod("p2", 400, 0, n2NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p3", 400, 0, n3NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p4", 400, 0, n3NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p5", 400, 0, n3NodeName, test.SetRSOwnerRef),
},
expectedPodsEvicted: 0,
},
{
name: "without priorities",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
v1.ResourcePods: 30,
},
nodes: []*v1.Node{
test.BuildTestNode(n1NodeName, 4000, 3000, 10, nil),
test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable),
},
pods: []*v1.Pod{
test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef),
// These won't be evicted.
test.BuildTestPod("p2", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A Critical Pod.
pod.Namespace = "kube-system"
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
}),
// These won't be evicted.
test.BuildTestPod("p3", 400, 0, n2NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p4", 400, 0, n2NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p5", 400, 0, n2NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p6", 400, 0, n2NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p7", 400, 0, n3NodeName, test.SetRSOwnerRef),
},
expectedPodsEvicted: 2,
evictedPods: []string{"p1", "p7"},
},
{
name: "without priorities stop when resource capacity is depleted",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
v1.ResourcePods: 30,
},
nodes: []*v1.Node{
test.BuildTestNode(n1NodeName, 2000, 3000, 10, nil),
test.BuildTestNode(n2NodeName, 2000, 3000, 10, nil),
test.BuildTestNode(n3NodeName, 2000, 3000, 10, test.SetNodeUnschedulable),
},
pods: []*v1.Pod{
test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef),
// These won't be evicted.
test.BuildTestPod("p2", 400, 0, n2NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p3", 400, 0, n2NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p4", 400, 0, n2NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p5", 400, 0, n2NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p6", 400, 0, n3NodeName, test.SetRSOwnerRef),
},
expectedPodsEvicted: 1,
},
{
name: "with priorities",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
},
nodes: []*v1.Node{
test.BuildTestNode(n1NodeName, 4000, 3000, 10, nil),
test.BuildTestNode(n2NodeName, 2000, 3000, 10, nil),
test.BuildTestNode(n3NodeName, 2000, 3000, 10, test.SetNodeUnschedulable),
},
pods: []*v1.Pod{
test.BuildTestPod("p1", 400, 0, n1NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.SetPodPriority(pod, lowPriority)
}),
test.BuildTestPod("p2", 400, 0, n1NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.SetPodPriority(pod, highPriority)
}),
// These won't be evicted.
test.BuildTestPod("p5", 400, 0, n2NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p6", 400, 0, n2NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p7", 400, 0, n2NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p8", 400, 0, n2NodeName, test.SetRSOwnerRef),
// These won't be evicted.
test.BuildTestPod("p9", 400, 0, n3NodeName, test.SetDSOwnerRef),
},
expectedPodsEvicted: 1,
evictedPods: []string{"p1"},
},
{
name: "without priorities evicting best-effort pods only",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
},
nodes: []*v1.Node{
test.BuildTestNode(n1NodeName, 3000, 3000, 10, nil),
test.BuildTestNode(n2NodeName, 3000, 3000, 5, nil),
test.BuildTestNode(n3NodeName, 3000, 3000, 10, test.SetNodeUnschedulable),
},
// All pods are assumed to be burstable (test.BuildTestNode always sets both cpu/memory resource requests to some value)
pods: []*v1.Pod{
test.BuildTestPod("p1", 400, 0, n1NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.MakeBestEffortPod(pod)
}),
test.BuildTestPod("p2", 400, 0, n1NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
}),
// These won't be evicted.
test.BuildTestPod("p3", 400, 0, n2NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p4", 400, 0, n2NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p5", 400, 0, n2NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p6", 400, 0, n2NodeName, test.SetRSOwnerRef),
},
expectedPodsEvicted: 1,
evictedPods: []string{"p1"},
},
{
name: "with extended resource",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 20,
extendedResource: 40,
},
nodes: []*v1.Node{
test.BuildTestNode(n1NodeName, 4000, 3000, 10, func(node *v1.Node) {
test.SetNodeExtendedResource(node, extendedResource, 8)
}),
test.BuildTestNode(n2NodeName, 4000, 3000, 10, func(node *v1.Node) {
test.SetNodeExtendedResource(node, extendedResource, 8)
}),
test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable),
},
pods: []*v1.Pod{
test.BuildTestPod("p1", 100, 0, n1NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.SetPodExtendedResourceRequest(pod, extendedResource, 1)
}),
test.BuildTestPod("p2", 100, 0, n1NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.SetPodExtendedResourceRequest(pod, extendedResource, 1)
}),
// These won't be evicted
test.BuildTestPod("p3", 500, 0, n2NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.SetPodExtendedResourceRequest(pod, extendedResource, 1)
}),
test.BuildTestPod("p4", 500, 0, n2NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.SetPodExtendedResourceRequest(pod, extendedResource, 1)
}),
test.BuildTestPod("p5", 500, 0, n2NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.SetPodExtendedResourceRequest(pod, extendedResource, 1)
}),
test.BuildTestPod("p6", 500, 0, n2NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.SetPodExtendedResourceRequest(pod, extendedResource, 1)
}),
},
expectedPodsEvicted: 2,
evictedPods: []string{"p1", "p2"},
},
{
name: "with extended resource in some of nodes",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 40,
extendedResource: 40,
},
nodes: []*v1.Node{
test.BuildTestNode(n1NodeName, 4000, 3000, 10, func(node *v1.Node) {
test.SetNodeExtendedResource(node, extendedResource, 8)
}),
test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable),
},
pods: []*v1.Pod{
//These won't be evicted
test.BuildTestPod("p1", 100, 0, n1NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.SetPodExtendedResourceRequest(pod, extendedResource, 1)
}),
test.BuildTestPod("p2", 100, 0, n1NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.SetPodExtendedResourceRequest(pod, extendedResource, 1)
}),
test.BuildTestPod("p3", 500, 0, n2NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p4", 500, 0, n2NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p5", 500, 0, n2NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p6", 500, 0, n2NodeName, test.SetRSOwnerRef),
},
expectedPodsEvicted: 0,
},
{
name: "Other node match pod node selector",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
v1.ResourcePods: 30,
},
nodes: []*v1.Node{
test.BuildTestNode(n1NodeName, 4000, 3000, 9, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeSelectorKey: nodeSelectorValue,
}
}),
test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
},
pods: []*v1.Pod{
test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p2", 400, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p3", 400, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p4", 400, 0, n1NodeName, test.SetDSOwnerRef),
test.BuildTestPod("p5", 400, 0, n2NodeName, func(pod *v1.Pod) {
// A pod selecting nodes in the "west" datacenter
test.SetRSOwnerRef(pod)
pod.Spec.NodeSelector = map[string]string{
nodeSelectorKey: nodeSelectorValue,
}
}),
},
expectedPodsEvicted: 1,
},
{
name: "Other node does not match pod node selector",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
v1.ResourcePods: 30,
},
nodes: []*v1.Node{
test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil),
test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
},
pods: []*v1.Pod{
test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p2", 400, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p3", 400, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p4", 400, 0, n1NodeName, test.SetDSOwnerRef),
test.BuildTestPod("p5", 400, 0, n2NodeName, func(pod *v1.Pod) {
// A pod selecting nodes in the "west" datacenter
test.SetRSOwnerRef(pod)
pod.Spec.NodeSelector = map[string]string{
nodeSelectorKey: nodeSelectorValue,
}
}),
},
expectedPodsEvicted: 0,
},
}
for _, testCase := range testCases {
t.Run(testCase.name, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
var objs []runtime.Object
for _, node := range testCase.nodes {
objs = append(objs, node)
}
for _, pod := range testCase.pods {
objs = append(objs, pod)
}
fakeClient := fake.NewSimpleClientset(objs...)
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
podInformer := sharedInformerFactory.Core().V1().Pods()
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {
t.Errorf("Build get pods assigned to node function error: %v", err)
}
podsForEviction := make(map[string]struct{})
for _, pod := range testCase.evictedPods {
podsForEviction[pod] = struct{}{}
}
evictionFailed := false
if len(testCase.evictedPods) > 0 {
fakeClient.Fake.AddReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) {
getAction := action.(core.CreateAction)
obj := getAction.GetObject()
if eviction, ok := obj.(*v1beta1.Eviction); ok {
if _, exists := podsForEviction[eviction.Name]; exists {
return true, obj, nil
}
evictionFailed = true
return true, nil, fmt.Errorf("pod %q was unexpectedly evicted", eviction.Name)
}
return true, obj, nil
})
}
sharedInformerFactory.Start(ctx.Done())
sharedInformerFactory.WaitForCacheSync(ctx.Done())
//fakeClient := &fake.Clientset{}
//fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
// list := action.(core.ListAction)
// fieldString := list.GetListRestrictions().Fields.String()
// if strings.Contains(fieldString, n1NodeName) {
// return true, test.pods[n1NodeName], nil
// }
// if strings.Contains(fieldString, n2NodeName) {
// return true, test.pods[n2NodeName], nil
// }
// if strings.Contains(fieldString, n3NodeName) {
// return true, test.pods[n3NodeName], nil
// }
// return true, nil, fmt.Errorf("Failed to list: %v", list)
//})
//fakeClient.Fake.AddReactor("get", "nodes", func(action core.Action) (bool, runtime.Object, error) {
// getAction := action.(core.GetAction)
// if node, exists := testCase.nodes[getAction.GetName()]; exists {
// return true, node, nil
// }
// return true, nil, fmt.Errorf("Wrong node: %v", getAction.GetName())
//})
podEvictor := evictions.NewPodEvictor(
fakeClient,
"v1",
false,
nil,
nil,
testCase.nodes,
false,
false,
false,
false,
false,
)
strategy := api.DeschedulerStrategy{
Enabled: true,
Params: &api.StrategyParameters{
NodeResourceUtilizationThresholds: &api.NodeResourceUtilizationThresholds{
Thresholds: testCase.thresholds,
},
NodeFit: true,
},
}
HighNodeUtilization(ctx, fakeClient, strategy, testCase.nodes, podEvictor, getPodsAssignedToNode)
podsEvicted := podEvictor.TotalEvicted()
if testCase.expectedPodsEvicted != podsEvicted {
t.Errorf("Expected %v pods to be evicted but %v got evicted", testCase.expectedPodsEvicted, podsEvicted)
}
if evictionFailed {
t.Errorf("Pod evictions failed unexpectedly")
}
})
}
}
func TestValidateHighNodeUtilizationStrategyConfig(t *testing.T) {
tests := []struct {
name string
thresholds api.ResourceThresholds
targetThresholds api.ResourceThresholds
errInfo error
}{
{
name: "passing target thresholds",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourceMemory: 20,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
v1.ResourceMemory: 80,
},
errInfo: fmt.Errorf("targetThresholds is not applicable for HighNodeUtilization"),
},
{
name: "passing empty thresholds",
thresholds: api.ResourceThresholds{},
errInfo: fmt.Errorf("thresholds config is not valid: no resource threshold is configured"),
},
{
name: "passing invalid thresholds",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
v1.ResourceMemory: 120,
},
errInfo: fmt.Errorf("thresholds config is not valid: %v", fmt.Errorf(
"%v threshold not in [%v, %v] range", v1.ResourceMemory, MinResourcePercentage, MaxResourcePercentage)),
},
{
name: "passing valid strategy config",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
v1.ResourceMemory: 80,
},
errInfo: nil,
},
{
name: "passing valid strategy config with extended resource",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
v1.ResourceMemory: 80,
extendedResource: 80,
},
errInfo: nil,
},
}
for _, testCase := range tests {
validateErr := validateHighUtilizationStrategyConfig(testCase.thresholds, testCase.targetThresholds)
if validateErr == nil || testCase.errInfo == nil {
if validateErr != testCase.errInfo {
t.Errorf("expected validity of strategy config: thresholds %#v targetThresholds %#v to be %v but got %v instead",
testCase.thresholds, testCase.targetThresholds, testCase.errInfo, validateErr)
}
} else if validateErr.Error() != testCase.errInfo.Error() {
t.Errorf("expected validity of strategy config: thresholds %#v targetThresholds %#v to be %v but got %v instead",
testCase.thresholds, testCase.targetThresholds, testCase.errInfo, validateErr)
}
}
}
func TestHighNodeUtilizationWithTaints(t *testing.T) {
strategy := api.DeschedulerStrategy{
Enabled: true,
Params: &api.StrategyParameters{
NodeResourceUtilizationThresholds: &api.NodeResourceUtilizationThresholds{
Thresholds: api.ResourceThresholds{
v1.ResourceCPU: 40,
},
},
},
}
n1 := test.BuildTestNode("n1", 1000, 3000, 10, nil)
n2 := test.BuildTestNode("n2", 1000, 3000, 10, nil)
n3 := test.BuildTestNode("n3", 1000, 3000, 10, nil)
n3withTaints := n3.DeepCopy()
n3withTaints.Spec.Taints = []v1.Taint{
{
Key: "key",
Value: "value",
Effect: v1.TaintEffectNoSchedule,
},
}
podThatToleratesTaint := test.BuildTestPod("tolerate_pod", 200, 0, n1.Name, test.SetRSOwnerRef)
podThatToleratesTaint.Spec.Tolerations = []v1.Toleration{
{
Key: "key",
Value: "value",
},
}
tests := []struct {
name string
nodes []*v1.Node
pods []*v1.Pod
evictionsExpected uint
}{
{
name: "No taints",
nodes: []*v1.Node{n1, n2, n3},
pods: []*v1.Pod{
//Node 1 pods
test.BuildTestPod(fmt.Sprintf("pod_1_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_2_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_3_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
// Node 2 pods
test.BuildTestPod(fmt.Sprintf("pod_4_%s", n2.Name), 200, 0, n2.Name, test.SetRSOwnerRef),
},
evictionsExpected: 1,
},
{
name: "No pod tolerates node taint",
nodes: []*v1.Node{n1, n3withTaints},
pods: []*v1.Pod{
//Node 1 pods
test.BuildTestPod(fmt.Sprintf("pod_1_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
// Node 3 pods
test.BuildTestPod(fmt.Sprintf("pod_2_%s", n3withTaints.Name), 200, 0, n3withTaints.Name, test.SetRSOwnerRef),
},
evictionsExpected: 0,
},
{
name: "Pod which tolerates node taint",
nodes: []*v1.Node{n1, n3withTaints},
pods: []*v1.Pod{
//Node 1 pods
test.BuildTestPod(fmt.Sprintf("pod_1_%s", n1.Name), 100, 0, n1.Name, test.SetRSOwnerRef),
podThatToleratesTaint,
// Node 3 pods
test.BuildTestPod(fmt.Sprintf("pod_9_%s", n3withTaints.Name), 500, 0, n3withTaints.Name, test.SetRSOwnerRef),
},
evictionsExpected: 1,
},
}
for _, item := range tests {
t.Run(item.name, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
var objs []runtime.Object
for _, node := range item.nodes {
objs = append(objs, node)
}
for _, pod := range item.pods {
objs = append(objs, pod)
}
fakeClient := fake.NewSimpleClientset(objs...)
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
podInformer := sharedInformerFactory.Core().V1().Pods()
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {
t.Errorf("Build get pods assigned to node function error: %v", err)
}
sharedInformerFactory.Start(ctx.Done())
sharedInformerFactory.WaitForCacheSync(ctx.Done())
podEvictor := evictions.NewPodEvictor(
fakeClient,
"policy/v1",
false,
&item.evictionsExpected,
nil,
item.nodes,
false,
false,
false,
false,
false,
)
HighNodeUtilization(ctx, fakeClient, strategy, item.nodes, podEvictor, getPodsAssignedToNode)
if item.evictionsExpected != podEvictor.TotalEvicted() {
t.Errorf("Expected %v evictions, got %v", item.evictionsExpected, podEvictor.TotalEvicted())
}
})
}
}

View File

@@ -0,0 +1,189 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nodeutilization
import (
"context"
"fmt"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
)
// LowNodeUtilization evicts pods from overutilized nodes to underutilized nodes. Note that CPU/Memory requests are used
// to calculate nodes' utilization and not the actual resource usage.
func LowNodeUtilization(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc) {
// TODO: May be create a struct for the strategy as well, so that we don't have to pass along the all the params?
if err := validateNodeUtilizationParams(strategy.Params); err != nil {
klog.ErrorS(err, "Invalid LowNodeUtilization parameters")
return
}
thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params)
if err != nil {
klog.ErrorS(err, "Failed to get threshold priority from strategy's params")
return
}
nodeFit := false
if strategy.Params != nil {
nodeFit = strategy.Params.NodeFit
}
thresholds := strategy.Params.NodeResourceUtilizationThresholds.Thresholds
targetThresholds := strategy.Params.NodeResourceUtilizationThresholds.TargetThresholds
if err := validateLowUtilizationStrategyConfig(thresholds, targetThresholds); err != nil {
klog.ErrorS(err, "LowNodeUtilization config is not valid")
return
}
// check if Pods/CPU/Mem are set, if not, set them to 100
if _, ok := thresholds[v1.ResourcePods]; !ok {
thresholds[v1.ResourcePods] = MaxResourcePercentage
targetThresholds[v1.ResourcePods] = MaxResourcePercentage
}
if _, ok := thresholds[v1.ResourceCPU]; !ok {
thresholds[v1.ResourceCPU] = MaxResourcePercentage
targetThresholds[v1.ResourceCPU] = MaxResourcePercentage
}
if _, ok := thresholds[v1.ResourceMemory]; !ok {
thresholds[v1.ResourceMemory] = MaxResourcePercentage
targetThresholds[v1.ResourceMemory] = MaxResourcePercentage
}
resourceNames := getResourceNames(thresholds)
lowNodes, sourceNodes := classifyNodes(
getNodeUsage(nodes, thresholds, targetThresholds, resourceNames, getPodsAssignedToNode),
// The node has to be schedulable (to be able to move workload there)
func(node *v1.Node, usage NodeUsage) bool {
if nodeutil.IsNodeUnschedulable(node) {
klog.V(2).InfoS("Node is unschedulable, thus not considered as underutilized", "node", klog.KObj(node))
return false
}
return isNodeWithLowUtilization(usage)
},
func(node *v1.Node, usage NodeUsage) bool {
return isNodeAboveTargetUtilization(usage)
},
)
// log message in one line
keysAndValues := []interface{}{
"CPU", thresholds[v1.ResourceCPU],
"Mem", thresholds[v1.ResourceMemory],
"Pods", thresholds[v1.ResourcePods],
}
for name := range thresholds {
if !isBasicResource(name) {
keysAndValues = append(keysAndValues, string(name), int64(thresholds[name]))
}
}
klog.V(1).InfoS("Criteria for a node under utilization", keysAndValues...)
klog.V(1).InfoS("Number of underutilized nodes", "totalNumber", len(lowNodes))
// log message in one line
keysAndValues = []interface{}{
"CPU", targetThresholds[v1.ResourceCPU],
"Mem", targetThresholds[v1.ResourceMemory],
"Pods", targetThresholds[v1.ResourcePods],
}
for name := range targetThresholds {
if !isBasicResource(name) {
keysAndValues = append(keysAndValues, string(name), int64(targetThresholds[name]))
}
}
klog.V(1).InfoS("Criteria for a node above target utilization", keysAndValues...)
klog.V(1).InfoS("Number of overutilized nodes", "totalNumber", len(sourceNodes))
if len(lowNodes) == 0 {
klog.V(1).InfoS("No node is underutilized, nothing to do here, you might tune your thresholds further")
return
}
if len(lowNodes) <= strategy.Params.NodeResourceUtilizationThresholds.NumberOfNodes {
klog.V(1).InfoS("Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here", "underutilizedNodes", len(lowNodes), "numberOfNodes", strategy.Params.NodeResourceUtilizationThresholds.NumberOfNodes)
return
}
if len(lowNodes) == len(nodes) {
klog.V(1).InfoS("All nodes are underutilized, nothing to do here")
return
}
if len(sourceNodes) == 0 {
klog.V(1).InfoS("All nodes are under target utilization, nothing to do here")
return
}
evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit))
// stop if node utilization drops below target threshold or any of required capacity (cpu, memory, pods) is moved
continueEvictionCond := func(nodeUsage NodeUsage, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool {
if !isNodeAboveTargetUtilization(nodeUsage) {
return false
}
for name := range totalAvailableUsage {
if totalAvailableUsage[name].CmpInt64(0) < 1 {
return false
}
}
return true
}
evictPodsFromSourceNodes(
ctx,
sourceNodes,
lowNodes,
podEvictor,
evictable.IsEvictable,
resourceNames,
"LowNodeUtilization",
continueEvictionCond)
klog.V(1).InfoS("Total number of pods evicted", "evictedPods", podEvictor.TotalEvicted())
}
// validateLowUtilizationStrategyConfig checks if the strategy's config is valid
func validateLowUtilizationStrategyConfig(thresholds, targetThresholds api.ResourceThresholds) error {
// validate thresholds and targetThresholds config
if err := validateThresholds(thresholds); err != nil {
return fmt.Errorf("thresholds config is not valid: %v", err)
}
if err := validateThresholds(targetThresholds); err != nil {
return fmt.Errorf("targetThresholds config is not valid: %v", err)
}
// validate if thresholds and targetThresholds have same resources configured
if len(thresholds) != len(targetThresholds) {
return fmt.Errorf("thresholds and targetThresholds configured different resources")
}
for resourceName, value := range thresholds {
if targetValue, ok := targetThresholds[resourceName]; !ok {
return fmt.Errorf("thresholds and targetThresholds configured different resources")
} else if value > targetValue {
return fmt.Errorf("thresholds' %v percentage is greater than targetThresholds'", resourceName)
}
}
return nil
}

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,406 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nodeutilization
import (
"context"
"fmt"
"sort"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/klog/v2"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
)
// NodeUsage stores a node's info, pods on it, thresholds and its resource usage
type NodeUsage struct {
node *v1.Node
usage map[v1.ResourceName]*resource.Quantity
allPods []*v1.Pod
lowResourceThreshold map[v1.ResourceName]*resource.Quantity
highResourceThreshold map[v1.ResourceName]*resource.Quantity
}
type continueEvictionCond func(nodeUsage NodeUsage, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool
// NodePodsMap is a set of (node, pods) pairs
type NodePodsMap map[*v1.Node][]*v1.Pod
const (
// MinResourcePercentage is the minimum value of a resource's percentage
MinResourcePercentage = 0
// MaxResourcePercentage is the maximum value of a resource's percentage
MaxResourcePercentage = 100
)
func validateNodeUtilizationParams(params *api.StrategyParameters) error {
if params == nil || params.NodeResourceUtilizationThresholds == nil {
return fmt.Errorf("NodeResourceUtilizationThresholds not set")
}
if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" {
return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set")
}
return nil
}
// validateThresholds checks if thresholds have valid resource name and resource percentage configured
func validateThresholds(thresholds api.ResourceThresholds) error {
if thresholds == nil || len(thresholds) == 0 {
return fmt.Errorf("no resource threshold is configured")
}
for name, percent := range thresholds {
if percent < MinResourcePercentage || percent > MaxResourcePercentage {
return fmt.Errorf("%v threshold not in [%v, %v] range", name, MinResourcePercentage, MaxResourcePercentage)
}
}
return nil
}
func getNodeUsage(
nodes []*v1.Node,
lowThreshold, highThreshold api.ResourceThresholds,
resourceNames []v1.ResourceName,
getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc,
) []NodeUsage {
var nodeUsageList []NodeUsage
for _, node := range nodes {
pods, err := podutil.ListPodsOnANode(node.Name, getPodsAssignedToNode, nil)
if err != nil {
klog.V(2).InfoS("Node will not be processed, error accessing its pods", "node", klog.KObj(node), "err", err)
continue
}
// A threshold is in percentages but in <0;100> interval.
// Performing `threshold * 0.01` will convert <0;100> interval into <0;1>.
// Multiplying it with capacity will give fraction of the capacity corresponding to the given high/low resource threshold in Quantity units.
nodeCapacity := node.Status.Capacity
if len(node.Status.Allocatable) > 0 {
nodeCapacity = node.Status.Allocatable
}
lowResourceThreshold := map[v1.ResourceName]*resource.Quantity{
v1.ResourceCPU: resource.NewMilliQuantity(int64(float64(lowThreshold[v1.ResourceCPU])*float64(nodeCapacity.Cpu().MilliValue())*0.01), resource.DecimalSI),
v1.ResourceMemory: resource.NewQuantity(int64(float64(lowThreshold[v1.ResourceMemory])*float64(nodeCapacity.Memory().Value())*0.01), resource.BinarySI),
v1.ResourcePods: resource.NewQuantity(int64(float64(lowThreshold[v1.ResourcePods])*float64(nodeCapacity.Pods().Value())*0.01), resource.DecimalSI),
}
for _, name := range resourceNames {
if !isBasicResource(name) {
cap := nodeCapacity[name]
lowResourceThreshold[name] = resource.NewQuantity(int64(float64(lowThreshold[name])*float64(cap.Value())*0.01), resource.DecimalSI)
}
}
highResourceThreshold := map[v1.ResourceName]*resource.Quantity{
v1.ResourceCPU: resource.NewMilliQuantity(int64(float64(highThreshold[v1.ResourceCPU])*float64(nodeCapacity.Cpu().MilliValue())*0.01), resource.DecimalSI),
v1.ResourceMemory: resource.NewQuantity(int64(float64(highThreshold[v1.ResourceMemory])*float64(nodeCapacity.Memory().Value())*0.01), resource.BinarySI),
v1.ResourcePods: resource.NewQuantity(int64(float64(highThreshold[v1.ResourcePods])*float64(nodeCapacity.Pods().Value())*0.01), resource.DecimalSI),
}
for _, name := range resourceNames {
if !isBasicResource(name) {
cap := nodeCapacity[name]
highResourceThreshold[name] = resource.NewQuantity(int64(float64(highThreshold[name])*float64(cap.Value())*0.01), resource.DecimalSI)
}
}
nodeUsageList = append(nodeUsageList, NodeUsage{
node: node,
usage: nodeUtilization(node, pods, resourceNames),
allPods: pods,
lowResourceThreshold: lowResourceThreshold,
highResourceThreshold: highResourceThreshold,
})
}
return nodeUsageList
}
func resourceUsagePercentages(nodeUsage NodeUsage) map[v1.ResourceName]float64 {
nodeCapacity := nodeUsage.node.Status.Capacity
if len(nodeUsage.node.Status.Allocatable) > 0 {
nodeCapacity = nodeUsage.node.Status.Allocatable
}
resourceUsagePercentage := map[v1.ResourceName]float64{}
for resourceName, resourceUsage := range nodeUsage.usage {
cap := nodeCapacity[resourceName]
if !cap.IsZero() {
resourceUsagePercentage[resourceName] = 100 * float64(resourceUsage.MilliValue()) / float64(cap.MilliValue())
}
}
return resourceUsagePercentage
}
// classifyNodes classifies the nodes into low-utilization or high-utilization nodes. If a node lies between
// low and high thresholds, it is simply ignored.
func classifyNodes(
nodeUsages []NodeUsage,
lowThresholdFilter, highThresholdFilter func(node *v1.Node, usage NodeUsage) bool,
) ([]NodeUsage, []NodeUsage) {
lowNodes, highNodes := []NodeUsage{}, []NodeUsage{}
for _, nodeUsage := range nodeUsages {
if lowThresholdFilter(nodeUsage.node, nodeUsage) {
klog.V(2).InfoS("Node is underutilized", "node", klog.KObj(nodeUsage.node), "usage", nodeUsage.usage, "usagePercentage", resourceUsagePercentages(nodeUsage))
lowNodes = append(lowNodes, nodeUsage)
} else if highThresholdFilter(nodeUsage.node, nodeUsage) {
klog.V(2).InfoS("Node is overutilized", "node", klog.KObj(nodeUsage.node), "usage", nodeUsage.usage, "usagePercentage", resourceUsagePercentages(nodeUsage))
highNodes = append(highNodes, nodeUsage)
} else {
klog.V(2).InfoS("Node is appropriately utilized", "node", klog.KObj(nodeUsage.node), "usage", nodeUsage.usage, "usagePercentage", resourceUsagePercentages(nodeUsage))
}
}
return lowNodes, highNodes
}
// evictPodsFromSourceNodes evicts pods based on priority, if all the pods on the node have priority, if not
// evicts them based on QoS as fallback option.
// TODO: @ravig Break this function into smaller functions.
func evictPodsFromSourceNodes(
ctx context.Context,
sourceNodes, destinationNodes []NodeUsage,
podEvictor *evictions.PodEvictor,
podFilter func(pod *v1.Pod) bool,
resourceNames []v1.ResourceName,
strategy string,
continueEviction continueEvictionCond,
) {
sortNodesByUsage(sourceNodes)
// upper bound on total number of pods/cpu/memory and optional extended resources to be moved
totalAvailableUsage := map[v1.ResourceName]*resource.Quantity{
v1.ResourcePods: {},
v1.ResourceCPU: {},
v1.ResourceMemory: {},
}
var taintsOfDestinationNodes = make(map[string][]v1.Taint, len(destinationNodes))
for _, node := range destinationNodes {
taintsOfDestinationNodes[node.node.Name] = node.node.Spec.Taints
for _, name := range resourceNames {
if _, ok := totalAvailableUsage[name]; !ok {
totalAvailableUsage[name] = resource.NewQuantity(0, resource.DecimalSI)
}
totalAvailableUsage[name].Add(*node.highResourceThreshold[name])
totalAvailableUsage[name].Sub(*node.usage[name])
}
}
// log message in one line
keysAndValues := []interface{}{
"CPU", totalAvailableUsage[v1.ResourceCPU].MilliValue(),
"Mem", totalAvailableUsage[v1.ResourceMemory].Value(),
"Pods", totalAvailableUsage[v1.ResourcePods].Value(),
}
for name := range totalAvailableUsage {
if !isBasicResource(name) {
keysAndValues = append(keysAndValues, string(name), totalAvailableUsage[name].Value())
}
}
klog.V(1).InfoS("Total capacity to be moved", keysAndValues...)
for _, node := range sourceNodes {
klog.V(3).InfoS("Evicting pods from node", "node", klog.KObj(node.node), "usage", node.usage)
nonRemovablePods, removablePods := classifyPods(node.allPods, podFilter)
klog.V(2).InfoS("Pods on node", "node", klog.KObj(node.node), "allPods", len(node.allPods), "nonRemovablePods", len(nonRemovablePods), "removablePods", len(removablePods))
if len(removablePods) == 0 {
klog.V(1).InfoS("No removable pods on node, try next node", "node", klog.KObj(node.node))
continue
}
klog.V(1).InfoS("Evicting pods based on priority, if they have same priority, they'll be evicted based on QoS tiers")
// sort the evictable Pods based on priority. This also sorts them based on QoS. If there are multiple pods with same priority, they are sorted based on QoS tiers.
podutil.SortPodsBasedOnPriorityLowToHigh(removablePods)
evictPods(ctx, removablePods, node, totalAvailableUsage, taintsOfDestinationNodes, podEvictor, strategy, continueEviction)
klog.V(1).InfoS("Evicted pods from node", "node", klog.KObj(node.node), "evictedPods", podEvictor.NodeEvicted(node.node), "usage", node.usage)
}
}
func evictPods(
ctx context.Context,
inputPods []*v1.Pod,
nodeUsage NodeUsage,
totalAvailableUsage map[v1.ResourceName]*resource.Quantity,
taintsOfLowNodes map[string][]v1.Taint,
podEvictor *evictions.PodEvictor,
strategy string,
continueEviction continueEvictionCond,
) {
if continueEviction(nodeUsage, totalAvailableUsage) {
for _, pod := range inputPods {
if !utils.PodToleratesTaints(pod, taintsOfLowNodes) {
klog.V(3).InfoS("Skipping eviction for pod, doesn't tolerate node taint", "pod", klog.KObj(pod))
continue
}
success, err := podEvictor.EvictPod(ctx, pod, nodeUsage.node, strategy)
if err != nil {
klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod))
break
}
if success {
klog.V(3).InfoS("Evicted pods", "pod", klog.KObj(pod), "err", err)
for name := range totalAvailableUsage {
if name == v1.ResourcePods {
nodeUsage.usage[name].Sub(*resource.NewQuantity(1, resource.DecimalSI))
totalAvailableUsage[name].Sub(*resource.NewQuantity(1, resource.DecimalSI))
} else {
quantity := utils.GetResourceRequestQuantity(pod, name)
nodeUsage.usage[name].Sub(quantity)
totalAvailableUsage[name].Sub(quantity)
}
}
keysAndValues := []interface{}{
"node", nodeUsage.node.Name,
"CPU", nodeUsage.usage[v1.ResourceCPU].MilliValue(),
"Mem", nodeUsage.usage[v1.ResourceMemory].Value(),
"Pods", nodeUsage.usage[v1.ResourcePods].Value(),
}
for name := range totalAvailableUsage {
if !isBasicResource(name) {
keysAndValues = append(keysAndValues, string(name), totalAvailableUsage[name].Value())
}
}
klog.V(3).InfoS("Updated node usage", keysAndValues...)
// check if pods can be still evicted
if !continueEviction(nodeUsage, totalAvailableUsage) {
break
}
}
}
}
}
// sortNodesByUsage sorts nodes based on usage in descending order
func sortNodesByUsage(nodes []NodeUsage) {
sort.Slice(nodes, func(i, j int) bool {
ti := nodes[i].usage[v1.ResourceMemory].Value() + nodes[i].usage[v1.ResourceCPU].MilliValue() + nodes[i].usage[v1.ResourcePods].Value()
tj := nodes[j].usage[v1.ResourceMemory].Value() + nodes[j].usage[v1.ResourceCPU].MilliValue() + nodes[j].usage[v1.ResourcePods].Value()
// extended resources
for name := range nodes[i].usage {
if !isBasicResource(name) {
ti = ti + nodes[i].usage[name].Value()
tj = tj + nodes[j].usage[name].Value()
}
}
// To return sorted in descending order
return ti > tj
})
}
// isNodeAboveTargetUtilization checks if a node is overutilized
// At least one resource has to be above the high threshold
func isNodeAboveTargetUtilization(usage NodeUsage) bool {
for name, nodeValue := range usage.usage {
// usage.highResourceThreshold[name] < nodeValue
if usage.highResourceThreshold[name].Cmp(*nodeValue) == -1 {
return true
}
}
return false
}
// isNodeWithLowUtilization checks if a node is underutilized
// All resources have to be below the low threshold
func isNodeWithLowUtilization(usage NodeUsage) bool {
for name, nodeValue := range usage.usage {
// usage.lowResourceThreshold[name] < nodeValue
if usage.lowResourceThreshold[name].Cmp(*nodeValue) == -1 {
return false
}
}
return true
}
// getResourceNames returns list of resource names in resource thresholds
func getResourceNames(thresholds api.ResourceThresholds) []v1.ResourceName {
resourceNames := make([]v1.ResourceName, 0, len(thresholds))
for name := range thresholds {
resourceNames = append(resourceNames, name)
}
return resourceNames
}
// isBasicResource checks if resource is basic native.
func isBasicResource(name v1.ResourceName) bool {
switch name {
case v1.ResourceCPU, v1.ResourceMemory, v1.ResourcePods:
return true
default:
return false
}
}
func nodeUtilization(node *v1.Node, pods []*v1.Pod, resourceNames []v1.ResourceName) map[v1.ResourceName]*resource.Quantity {
totalReqs := map[v1.ResourceName]*resource.Quantity{
v1.ResourceCPU: resource.NewMilliQuantity(0, resource.DecimalSI),
v1.ResourceMemory: resource.NewQuantity(0, resource.BinarySI),
v1.ResourcePods: resource.NewQuantity(int64(len(pods)), resource.DecimalSI),
}
for _, name := range resourceNames {
if !isBasicResource(name) {
totalReqs[name] = resource.NewQuantity(0, resource.DecimalSI)
}
}
for _, pod := range pods {
req, _ := utils.PodRequestsAndLimits(pod)
for _, name := range resourceNames {
quantity, ok := req[name]
if ok && name != v1.ResourcePods {
// As Quantity.Add says: Add adds the provided y quantity to the current value. If the current value is zero,
// the format of the quantity will be updated to the format of y.
totalReqs[name].Add(quantity)
}
}
}
return totalReqs
}
func classifyPods(pods []*v1.Pod, filter func(pod *v1.Pod) bool) ([]*v1.Pod, []*v1.Pod) {
var nonRemovablePods, removablePods []*v1.Pod
for _, pod := range pods {
if !filter(pod) {
nonRemovablePods = append(nonRemovablePods, pod)
} else {
removablePods = append(removablePods, pod)
}
}
return nonRemovablePods, removablePods
}

View File

@@ -0,0 +1,158 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nodeutilization
import (
"fmt"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"math"
"sigs.k8s.io/descheduler/pkg/api"
"testing"
)
var (
lowPriority = int32(0)
highPriority = int32(10000)
extendedResource = v1.ResourceName("example.com/foo")
)
func TestValidateThresholds(t *testing.T) {
tests := []struct {
name string
input api.ResourceThresholds
errInfo error
}{
{
name: "passing nil map for threshold",
input: nil,
errInfo: fmt.Errorf("no resource threshold is configured"),
},
{
name: "passing no threshold",
input: api.ResourceThresholds{},
errInfo: fmt.Errorf("no resource threshold is configured"),
},
{
name: "passing extended resource name other than cpu/memory/pods",
input: api.ResourceThresholds{
v1.ResourceCPU: 40,
extendedResource: 50,
},
errInfo: nil,
},
{
name: "passing invalid resource value",
input: api.ResourceThresholds{
v1.ResourceCPU: 110,
v1.ResourceMemory: 80,
},
errInfo: fmt.Errorf("%v threshold not in [%v, %v] range", v1.ResourceCPU, MinResourcePercentage, MaxResourcePercentage),
},
{
name: "passing a valid threshold with max and min resource value",
input: api.ResourceThresholds{
v1.ResourceCPU: 100,
v1.ResourceMemory: 0,
},
errInfo: nil,
},
{
name: "passing a valid threshold with only cpu",
input: api.ResourceThresholds{
v1.ResourceCPU: 80,
},
errInfo: nil,
},
{
name: "passing a valid threshold with cpu, memory and pods",
input: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourceMemory: 30,
v1.ResourcePods: 40,
},
errInfo: nil,
},
{
name: "passing a valid threshold with only extended resource",
input: api.ResourceThresholds{
extendedResource: 80,
},
errInfo: nil,
},
{
name: "passing a valid threshold with cpu, memory, pods and extended resource",
input: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourceMemory: 30,
v1.ResourcePods: 40,
extendedResource: 50,
},
errInfo: nil,
},
}
for _, test := range tests {
validateErr := validateThresholds(test.input)
if validateErr == nil || test.errInfo == nil {
if validateErr != test.errInfo {
t.Errorf("expected validity of threshold: %#v to be %v but got %v instead", test.input, test.errInfo, validateErr)
}
} else if validateErr.Error() != test.errInfo.Error() {
t.Errorf("expected validity of threshold: %#v to be %v but got %v instead", test.input, test.errInfo, validateErr)
}
}
}
func TestResourceUsagePercentages(t *testing.T) {
resourceUsagePercentage := resourceUsagePercentages(NodeUsage{
node: &v1.Node{
Status: v1.NodeStatus{
Capacity: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(2000, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(3977868*1024, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(29, resource.BinarySI),
},
Allocatable: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(1930, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(3287692*1024, resource.BinarySI),
v1.ResourcePods: *resource.NewQuantity(29, resource.BinarySI),
},
},
},
usage: map[v1.ResourceName]*resource.Quantity{
v1.ResourceCPU: resource.NewMilliQuantity(1220, resource.DecimalSI),
v1.ResourceMemory: resource.NewQuantity(3038982964, resource.BinarySI),
v1.ResourcePods: resource.NewQuantity(11, resource.BinarySI),
},
})
expectedUsageInIntPercentage := map[v1.ResourceName]float64{
v1.ResourceCPU: 63,
v1.ResourceMemory: 90,
v1.ResourcePods: 37,
}
for resourceName, percentage := range expectedUsageInIntPercentage {
if math.Floor(resourceUsagePercentage[resourceName]) != percentage {
t.Errorf("Incorrect percentange computation, expected %v, got math.Floor(%v) instead", percentage, resourceUsagePercentage[resourceName])
}
}
t.Logf("resourceUsagePercentage: %#v\n", resourceUsagePercentage)
}

View File

@@ -18,22 +18,79 @@ package strategies
import (
"context"
"fmt"
"k8s.io/apimachinery/pkg/util/sets"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
)
func validateRemovePodsViolatingInterPodAntiAffinityParams(params *api.StrategyParameters) error {
if params == nil {
return nil
}
// At most one of include/exclude can be set
if params.Namespaces != nil && len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 {
return fmt.Errorf("only one of Include/Exclude namespaces can be set")
}
if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" {
return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set")
}
return nil
}
// RemovePodsViolatingInterPodAntiAffinity evicts pods on the node which are having a pod affinity rules.
func RemovePodsViolatingInterPodAntiAffinity(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor) {
func RemovePodsViolatingInterPodAntiAffinity(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc) {
if err := validateRemovePodsViolatingInterPodAntiAffinityParams(strategy.Params); err != nil {
klog.ErrorS(err, "Invalid RemovePodsViolatingInterPodAntiAffinity parameters")
return
}
var includedNamespaces, excludedNamespaces sets.String
var labelSelector *metav1.LabelSelector
if strategy.Params != nil {
if strategy.Params.Namespaces != nil {
includedNamespaces = sets.NewString(strategy.Params.Namespaces.Include...)
excludedNamespaces = sets.NewString(strategy.Params.Namespaces.Exclude...)
}
labelSelector = strategy.Params.LabelSelector
}
thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params)
if err != nil {
klog.ErrorS(err, "Failed to get threshold priority from strategy's params")
return
}
nodeFit := false
if strategy.Params != nil {
nodeFit = strategy.Params.NodeFit
}
evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit))
podFilter, err := podutil.NewOptions().
WithNamespaces(includedNamespaces).
WithoutNamespaces(excludedNamespaces).
WithLabelSelector(labelSelector).
BuildFilterFunc()
if err != nil {
klog.ErrorS(err, "Error initializing pod filter function")
return
}
for _, node := range nodes {
klog.V(1).Infof("Processing node: %#v\n", node.Name)
pods, err := podutil.ListPodsOnANode(ctx, client, node, podEvictor.IsEvictable)
klog.V(1).InfoS("Processing node", "node", klog.KObj(node))
pods, err := podutil.ListPodsOnANode(node.Name, getPodsAssignedToNode, podFilter)
if err != nil {
return
}
@@ -41,10 +98,10 @@ func RemovePodsViolatingInterPodAntiAffinity(ctx context.Context, client clients
podutil.SortPodsBasedOnPriorityLowToHigh(pods)
totalPods := len(pods)
for i := 0; i < totalPods; i++ {
if checkPodsWithAntiAffinityExist(pods[i], pods) {
if checkPodsWithAntiAffinityExist(pods[i], pods) && evictable.IsEvictable(pods[i]) {
success, err := podEvictor.EvictPod(ctx, pods[i], node, "InterPodAntiAffinity")
if err != nil {
klog.Errorf("Error evicting pod: (%#v)", err)
klog.ErrorS(err, "Error evicting pod")
break
}
@@ -69,7 +126,7 @@ func checkPodsWithAntiAffinityExist(pod *v1.Pod, pods []*v1.Pod) bool {
namespaces := utils.GetNamespacesFromPodAffinityTerm(pod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
klog.Infof("%v", err)
klog.ErrorS(err, "Unable to convert LabelSelector into Selector")
return false
}
for _, existingPod := range pods {

View File

@@ -20,30 +20,56 @@ import (
"context"
"testing"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
policyv1 "k8s.io/api/policy/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
"sigs.k8s.io/descheduler/test"
)
func TestPodAntiAffinity(t *testing.T) {
ctx := context.Background()
node := test.BuildTestNode("n1", 2000, 3000, 10, nil)
p1 := test.BuildTestPod("p1", 100, 0, node.Name, nil)
p2 := test.BuildTestPod("p2", 100, 0, node.Name, nil)
p3 := test.BuildTestPod("p3", 100, 0, node.Name, nil)
p4 := test.BuildTestPod("p4", 100, 0, node.Name, nil)
p5 := test.BuildTestPod("p5", 100, 0, node.Name, nil)
p6 := test.BuildTestPod("p6", 100, 0, node.Name, nil)
p7 := test.BuildTestPod("p7", 100, 0, node.Name, nil)
node1 := test.BuildTestNode("n1", 2000, 3000, 10, nil)
node2 := test.BuildTestNode("n2", 2000, 3000, 10, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
"datacenter": "east",
}
})
node3 := test.BuildTestNode("n3", 2000, 3000, 10, func(node *v1.Node) {
node.Spec = v1.NodeSpec{
Unschedulable: true,
}
})
p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil)
p2 := test.BuildTestPod("p2", 100, 0, node1.Name, nil)
p3 := test.BuildTestPod("p3", 100, 0, node1.Name, nil)
p4 := test.BuildTestPod("p4", 100, 0, node1.Name, nil)
p5 := test.BuildTestPod("p5", 100, 0, node1.Name, nil)
p6 := test.BuildTestPod("p6", 100, 0, node1.Name, nil)
p7 := test.BuildTestPod("p7", 100, 0, node1.Name, nil)
p8 := test.BuildTestPod("p8", 100, 0, node1.Name, nil)
p9 := test.BuildTestPod("p9", 100, 0, node1.Name, nil)
p10 := test.BuildTestPod("p10", 100, 0, node1.Name, nil)
p9.DeletionTimestamp = &metav1.Time{}
p10.DeletionTimestamp = &metav1.Time{}
criticalPriority := utils.SystemCriticalPriority
nonEvictablePod := test.BuildTestPod("non-evict", 100, 0, node1.Name, func(pod *v1.Pod) {
pod.Spec.Priority = &criticalPriority
})
p2.Labels = map[string]string{"foo": "bar"}
p5.Labels = map[string]string{"foo": "bar"}
p6.Labels = map[string]string{"foo": "bar"}
p7.Labels = map[string]string{"foo1": "bar1"}
nonEvictablePod.Labels = map[string]string{"foo": "bar"}
test.SetNormalOwnerRef(p1)
test.SetNormalOwnerRef(p2)
test.SetNormalOwnerRef(p3)
@@ -51,6 +77,8 @@ func TestPodAntiAffinity(t *testing.T) {
test.SetNormalOwnerRef(p5)
test.SetNormalOwnerRef(p6)
test.SetNormalOwnerRef(p7)
test.SetNormalOwnerRef(p9)
test.SetNormalOwnerRef(p10)
// set pod anti affinity
setPodAntiAffinity(p1, "foo", "bar")
@@ -59,62 +87,146 @@ func TestPodAntiAffinity(t *testing.T) {
setPodAntiAffinity(p5, "foo1", "bar1")
setPodAntiAffinity(p6, "foo1", "bar1")
setPodAntiAffinity(p7, "foo", "bar")
setPodAntiAffinity(p9, "foo", "bar")
setPodAntiAffinity(p10, "foo", "bar")
// set pod priority
test.SetPodPriority(p5, 100)
test.SetPodPriority(p6, 50)
test.SetPodPriority(p7, 0)
// Set pod node selectors
p8.Spec.NodeSelector = map[string]string{
"datacenter": "west",
}
var uint1 uint = 1
var uint3 uint = 3
tests := []struct {
description string
maxPodsToEvictPerNode int
pods []v1.Pod
expectedEvictedPodCount int
description string
maxPodsToEvictPerNode *uint
maxNoOfPodsToEvictPerNamespace *uint
pods []*v1.Pod
expectedEvictedPodCount uint
nodeFit bool
nodes []*v1.Node
}{
{
description: "Maximum pods to evict - 0",
maxPodsToEvictPerNode: 0,
pods: []v1.Pod{*p1, *p2, *p3, *p4},
pods: []*v1.Pod{p1, p2, p3, p4},
nodes: []*v1.Node{node1},
expectedEvictedPodCount: 3,
},
{
description: "Maximum pods to evict - 3",
maxPodsToEvictPerNode: 3,
pods: []v1.Pod{*p1, *p2, *p3, *p4},
maxPodsToEvictPerNode: &uint3,
pods: []*v1.Pod{p1, p2, p3, p4},
nodes: []*v1.Node{node1},
expectedEvictedPodCount: 3,
},
{
description: "Maximum pods to evict (maxPodsToEvictPerNamespace=3) - 3",
maxNoOfPodsToEvictPerNamespace: &uint3,
pods: []*v1.Pod{p1, p2, p3, p4},
nodes: []*v1.Node{node1},
expectedEvictedPodCount: 3,
},
{
description: "Evict only 1 pod after sorting",
maxPodsToEvictPerNode: 0,
pods: []v1.Pod{*p5, *p6, *p7},
pods: []*v1.Pod{p5, p6, p7},
nodes: []*v1.Node{node1},
expectedEvictedPodCount: 1,
},
{
description: "Evicts pod that conflicts with critical pod (but does not evict critical pod)",
maxPodsToEvictPerNode: &uint1,
pods: []*v1.Pod{p1, nonEvictablePod},
nodes: []*v1.Node{node1},
expectedEvictedPodCount: 1,
},
{
description: "Evicts pod that conflicts with critical pod (but does not evict critical pod)",
maxPodsToEvictPerNode: &uint1,
pods: []*v1.Pod{p1, nonEvictablePod},
nodes: []*v1.Node{node1},
expectedEvictedPodCount: 1,
},
{
description: "Won't evict pods because node selectors don't match available nodes",
maxPodsToEvictPerNode: &uint1,
pods: []*v1.Pod{p8, nonEvictablePod},
nodes: []*v1.Node{node1, node2},
expectedEvictedPodCount: 0,
nodeFit: true,
},
{
description: "Won't evict pods because only other node is not schedulable",
maxPodsToEvictPerNode: &uint1,
pods: []*v1.Pod{p8, nonEvictablePod},
nodes: []*v1.Node{node1, node3},
expectedEvictedPodCount: 0,
nodeFit: true,
},
{
description: "No pod to evicted since all pod terminating",
pods: []*v1.Pod{p9, p10},
nodes: []*v1.Node{node1},
expectedEvictedPodCount: 0,
},
}
for _, test := range tests {
// create fake client
fakeClient := &fake.Clientset{}
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
return true, &v1.PodList{Items: test.pods}, nil
})
fakeClient.Fake.AddReactor("get", "nodes", func(action core.Action) (bool, runtime.Object, error) {
return true, node, nil
})
t.Run(test.description, func(t *testing.T) {
podEvictor := evictions.NewPodEvictor(
fakeClient,
"v1",
false,
test.maxPodsToEvictPerNode,
[]*v1.Node{node},
false,
)
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
RemovePodsViolatingInterPodAntiAffinity(ctx, fakeClient, api.DeschedulerStrategy{}, []*v1.Node{node}, podEvictor)
podsEvicted := podEvictor.TotalEvicted()
if podsEvicted != test.expectedEvictedPodCount {
t.Errorf("Unexpected no of pods evicted: pods evicted: %d, expected: %d", podsEvicted, test.expectedEvictedPodCount)
}
var objs []runtime.Object
for _, node := range test.nodes {
objs = append(objs, node)
}
for _, pod := range test.pods {
objs = append(objs, pod)
}
fakeClient := fake.NewSimpleClientset(objs...)
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
podInformer := sharedInformerFactory.Core().V1().Pods()
getPodsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {
t.Errorf("Build get pods assigned to node function error: %v", err)
}
sharedInformerFactory.Start(ctx.Done())
sharedInformerFactory.WaitForCacheSync(ctx.Done())
podEvictor := evictions.NewPodEvictor(
fakeClient,
policyv1.SchemeGroupVersion.String(),
false,
test.maxPodsToEvictPerNode,
test.maxNoOfPodsToEvictPerNamespace,
test.nodes,
false,
false,
false,
false,
false,
)
strategy := api.DeschedulerStrategy{
Params: &api.StrategyParameters{
NodeFit: test.nodeFit,
},
}
RemovePodsViolatingInterPodAntiAffinity(ctx, fakeClient, strategy, test.nodes, podEvictor, getPodsAssignedToNode)
podsEvicted := podEvictor.TotalEvicted()
if podsEvicted != test.expectedEvictedPodCount {
t.Errorf("Unexpected no of pods evicted: pods evicted: %d, expected: %d", podsEvicted, test.expectedEvictedPodCount)
}
})
}
}

View File

@@ -18,51 +18,122 @@ package strategies
import (
"context"
"fmt"
v1 "k8s.io/api/core/v1"
v1meta "k8s.io/apimachinery/pkg/apis/meta/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
)
func validatePodLifeTimeParams(params *api.StrategyParameters) error {
if params == nil || params.PodLifeTime == nil || params.PodLifeTime.MaxPodLifeTimeSeconds == nil {
return fmt.Errorf("MaxPodLifeTimeSeconds not set")
}
if params.PodLifeTime.PodStatusPhases != nil {
for _, phase := range params.PodLifeTime.PodStatusPhases {
if phase != string(v1.PodPending) && phase != string(v1.PodRunning) {
return fmt.Errorf("only Pending and Running phases are supported in PodLifeTime")
}
}
}
// At most one of include/exclude can be set
if params.Namespaces != nil && len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 {
return fmt.Errorf("only one of Include/Exclude namespaces can be set")
}
if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" {
return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set")
}
return nil
}
// PodLifeTime evicts pods on nodes that were created more than strategy.Params.MaxPodLifeTimeSeconds seconds ago.
func PodLifeTime(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor) {
if strategy.Params == nil || strategy.Params.MaxPodLifeTimeSeconds == nil {
klog.V(1).Infof("MaxPodLifeTimeSeconds not set")
func PodLifeTime(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor, getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc) {
if err := validatePodLifeTimeParams(strategy.Params); err != nil {
klog.ErrorS(err, "Invalid PodLifeTime parameters")
return
}
thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params)
if err != nil {
klog.ErrorS(err, "Failed to get threshold priority from strategy's params")
return
}
var includedNamespaces, excludedNamespaces sets.String
if strategy.Params.Namespaces != nil {
includedNamespaces = sets.NewString(strategy.Params.Namespaces.Include...)
excludedNamespaces = sets.NewString(strategy.Params.Namespaces.Exclude...)
}
evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority))
filter := evictable.IsEvictable
if strategy.Params.PodLifeTime.PodStatusPhases != nil {
filter = func(pod *v1.Pod) bool {
for _, phase := range strategy.Params.PodLifeTime.PodStatusPhases {
if string(pod.Status.Phase) == phase {
return evictable.IsEvictable(pod)
}
}
return false
}
}
podFilter, err := podutil.NewOptions().
WithFilter(filter).
WithNamespaces(includedNamespaces).
WithoutNamespaces(excludedNamespaces).
WithLabelSelector(strategy.Params.LabelSelector).
BuildFilterFunc()
if err != nil {
klog.ErrorS(err, "Error initializing pod filter function")
return
}
for _, node := range nodes {
klog.V(1).Infof("Processing node: %#v", node.Name)
pods := listOldPodsOnNode(ctx, client, node, *strategy.Params.MaxPodLifeTimeSeconds, podEvictor)
klog.V(1).InfoS("Processing node", "node", klog.KObj(node))
pods := listOldPodsOnNode(node.Name, getPodsAssignedToNode, podFilter, *strategy.Params.PodLifeTime.MaxPodLifeTimeSeconds)
for _, pod := range pods {
success, err := podEvictor.EvictPod(ctx, pod, node, "PodLifeTime")
if success {
klog.V(1).Infof("Evicted pod: %#v because it was created more than %v seconds ago", pod.Name, *strategy.Params.MaxPodLifeTimeSeconds)
klog.V(1).InfoS("Evicted pod because it exceeded its lifetime", "pod", klog.KObj(pod), "maxPodLifeTime", *strategy.Params.PodLifeTime.MaxPodLifeTimeSeconds)
}
if err != nil {
klog.Errorf("Error evicting pod: (%#v)", err)
klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod))
break
}
}
}
}
func listOldPodsOnNode(ctx context.Context, client clientset.Interface, node *v1.Node, maxAge uint, evictor *evictions.PodEvictor) []*v1.Pod {
pods, err := podutil.ListPodsOnANode(ctx, client, node, evictor.IsEvictable)
func listOldPodsOnNode(
nodeName string,
getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc,
filter podutil.FilterFunc,
maxPodLifeTimeSeconds uint,
) []*v1.Pod {
pods, err := podutil.ListPodsOnANode(nodeName, getPodsAssignedToNode, filter)
if err != nil {
return nil
}
var oldPods []*v1.Pod
for _, pod := range pods {
podAgeSeconds := uint(v1meta.Now().Sub(pod.GetCreationTimestamp().Local()).Seconds())
if podAgeSeconds > maxAge {
podAgeSeconds := uint(metav1.Now().Sub(pod.GetCreationTimestamp().Local()).Seconds())
if podAgeSeconds > maxPodLifeTimeSeconds {
oldPods = append(oldPods, pod)
}
}

Some files were not shown because too many files have changed in this diff Show More