1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 13:29:11 +01:00

Compare commits

...

439 Commits

Author SHA1 Message Date
Kubernetes Prow Robot
f79131d0af Merge pull request #745 from damemi/update-go-version-122
[release-1.22] Bump Go to 1.16.14
2022-03-03 01:40:49 -08:00
Mike Dame
ef85c0fff1 Bump Go to 1.16.14 2022-02-25 20:49:51 +00:00
Kubernetes Prow Robot
f5538db9d2 Merge pull request #634 from a7i/release-1.22.1
Release 1.22.1
2021-09-29 09:36:49 -07:00
Amir Alavi
12e6bf931d Update Docs, Manifests, and Helm Chart version to 0.22.1 2021-09-28 10:55:01 -04:00
Amir Alavi
41fca2ed61 e2e tests for RemoveFailedPods strategy
Fix priority class default
2021-09-28 10:54:50 -04:00
Amir Alavi
8e1da96082 e2e TestTopologySpreadConstraint: ensure pods are running before checking for topology spread across domains 2021-09-28 10:51:49 -04:00
Amir Alavi
27fec39c65 RemoveFailedPods: guard against nil descheduler strategy (e.g. in case of default that loads all strategies) 2021-09-28 10:51:49 -04:00
Kubernetes Prow Robot
6bfc76b6b2 Merge pull request #624 from damemi/release-1.22
[release-1.22] Update Helm chart version to 0.22.0
2021-09-08 13:43:59 -07:00
Mike Dame
9f1274f3ab Update Helm chart version to 0.22.0 2021-09-08 16:00:56 -04:00
Kubernetes Prow Robot
e6926e11ea Merge pull request #617 from KohlsTechnology/docs-1.22
Update Docs and Manifests for v0.22.0
2021-08-31 09:31:37 -07:00
Sean Malloy
16228c9dd1 Update Docs and Manifests for v0.22.0
* Added v0.22 references to README
* Update k8s manifests with v0.22.0 references
* Added table with list of supported architectures by release
2021-08-31 00:24:18 -05:00
Kubernetes Prow Robot
57dabbca5c Merge pull request #597 from a7i/amir/e2e-topology
Add e2e tests for TopologySpreadConstraint
2021-08-30 22:00:28 -07:00
Kubernetes Prow Robot
04439c6e64 Merge pull request #610 from a7i/failed-pods
Introduce RemoveFailedPods strategy
2021-08-30 11:30:09 -07:00
Amir Alavi
0e0e688fe8 Introduce RemoveFailedPods strategy 2021-08-30 14:17:52 -04:00
Kubernetes Prow Robot
0603de4353 Merge pull request #613 from derdanne/derdanne/highnodeutilization-readme-591
nodeutilization strategy: "numberOfNodes" should work as documented
2021-08-24 08:29:14 -07:00
Kubernetes Prow Robot
ea911db6dc Merge pull request #615 from KohlsTechnology/bump-1.22
Bump To k8s 1.22.0
2021-08-19 07:07:24 -07:00
Sean Malloy
c079c7aaae Bump To k8s 1.22.0 2021-08-19 00:40:38 -05:00
Kubernetes Prow Robot
5420988a28 Merge pull request #614 from KohlsTechnology/bump-go-version
Bump To Go 1.16.7
2021-08-18 02:10:08 -07:00
Sean Malloy
b56a1ab80a Bump To Go 1.16.7
The main k/k repo was updated to Go 1.16.7 for k8s
v1.22.0 release. See below PR for reference.

https://github.com/kubernetes/kubernetes/pull/104200
2021-08-17 23:47:45 -05:00
Daniel Klockenkämper
a6b34c1130 nodeutilization strategy: "numberOfNodes" should work as documented 2021-08-17 13:29:07 +00:00
Amir Alavi
0de8002b7d Update gce scripts to spread nodes over 2 zones 2021-08-16 22:41:37 -04:00
Amir Alavi
84d648ff60 Add e2e tests for TopologySpreadConstraint 2021-08-16 22:39:31 -04:00
Kubernetes Prow Robot
6ad6f6fce5 Merge pull request #592 from chrisjohnson00/issue-591
docs: adding clarification to HighNodeUtilization's purpose
2021-08-12 21:38:22 -07:00
Kubernetes Prow Robot
b7100ad871 Merge pull request #582 from praxist/helm-deployment
Update helm chart for running as deployment
2021-08-12 07:29:47 -07:00
Matthew Leung
38c0f1c639 Update helm chart for running as deployment 2021-08-11 11:56:14 -07:00
Kubernetes Prow Robot
64d7901d82 Merge pull request #602 from a7i/balance-domains-aboveavg
TopologySpreadConstraint: advance above avg index when at ideal average
2021-08-10 05:47:25 -07:00
Kubernetes Prow Robot
ab1015e5fa Merge pull request #599 from a7i/amir/update-gce-images
Update GCE images to Ubuntu 18.04 LTS
2021-08-10 05:47:18 -07:00
Kubernetes Prow Robot
1753bf4422 Merge pull request #611 from a7i/docs-remove-redundant-strategies
Update README to remove redundant list of strategies
2021-08-02 08:11:22 -07:00
Amir Alavi
7cb44dca27 Update README to remove redundant list of strategies 2021-07-30 16:35:00 -04:00
Amir Alavi
2ec4b8b674 Update GCE images to Ubuntu 18.04 LTS 2021-07-30 16:14:20 -04:00
Kubernetes Prow Robot
0d0633488d Merge pull request #598 from jayonlau/clenup
Clean up extra spaces
2021-07-30 11:11:37 -07:00
Kubernetes Prow Robot
f3b3853d9d Merge pull request #562 from wsscc2021/helm-chart-tolerations
Add tolerations to cronjob in helm chart
2021-07-30 10:43:38 -07:00
Kubernetes Prow Robot
e550e5e22a Merge pull request #600 from a7i/topology-example
Add example for RemovePodsViolatingTopologySpreadConstraint
2021-07-28 05:37:34 -07:00
Kubernetes Prow Robot
2bf37ff495 Merge pull request #608 from a7i/master
Remove kind binary from repo
2021-07-28 01:13:35 -07:00
Amir Alavi
fa84ec6774 Remove kind binary from repo and add to gitignore 2021-07-27 08:28:55 -04:00
Kubernetes Prow Robot
e18e0416b1 Merge pull request #607 from a7i/fix-helm-test
Place bash shebang at the top of the script + Ensure Helm installed for run-helm-tests
2021-07-27 00:40:45 -07:00
Amir Alavi
34282162f8 Wait for job to start/finish in helm-test script 2021-07-26 23:53:36 -04:00
Amir Alavi
7a043d31be Ensure helm is installed and move shebang to top of file 2021-07-26 16:16:20 -04:00
Amir Alavi
b0e5d64bd7 TopologySpreadConstraint: advance above avg index when at ideal average when balancing domains 2021-07-13 22:55:10 -04:00
Amir Alavi
1c9ac2daee Add example for RemovePodsViolatingTopologySpreadConstraint 2021-07-09 22:56:59 -04:00
jayonlau
c6b67e8a6f Clean up extra spaces
Clean up extra spaces, although these errors are not important, they affect the code specification.
2021-07-09 15:40:35 +08:00
Kubernetes Prow Robot
2e4873d103 Merge pull request #594 from ikarldasan/master-to-main
Rename master to main
2021-07-06 05:50:18 -07:00
Karl Dasan
3e483c4d85 Rename master to main 2021-07-01 13:48:50 +05:30
Kubernetes Prow Robot
032ea70380 Merge pull request #549 from pravarag/verify-defaulters-gen
Add verify scripts for defaulters generator
2021-06-30 04:05:03 -07:00
Pravar Agrawal
df84dc4548 add verify script for defaulters gen 2021-06-30 09:41:43 +05:30
Chris Johnson
d4fa83f8bc chore: PR feedback 2021-06-25 12:30:28 -07:00
Chris Johnson
448dbceadd docs: adding clarification to HighNodeUtilization's purpose 2021-06-24 16:20:21 -07:00
Kubernetes Prow Robot
b83b064992 Merge pull request #589 from a7i/remove-kind-bin
Remove kind binary from repo
2021-06-16 00:19:59 -07:00
Amir Alavi
133a0049e3 Remove kind binary from repo 2021-06-15 18:29:06 -04:00
Kubernetes Prow Robot
50b1c1337d Merge pull request #561 from mekza/support_image_pull_secrets
Add support for private registry creds
2021-06-10 23:28:58 -07:00
Martin-Zack Mekkaoui
d5deed44ca Add support for private registry creds 2021-06-09 22:11:37 +02:00
Kubernetes Prow Robot
0f785b9530 Merge pull request #584 from damemi/update-go-badge
Update Go report card badge
2021-06-08 10:48:08 -07:00
Mike Dame
eb1f0ecc14 Update Go report card badge 2021-06-08 13:39:53 -04:00
Kubernetes Prow Robot
b59995eeb8 Merge pull request #583 from ingvagabund/highnodeutil-nodefit
HighNodeUtilization: add NodeFit feature
2021-06-08 08:15:13 -07:00
Jan Chaloupka
d998d82357 HighNodeUtilization: add NodeFit feature 2021-06-08 16:59:43 +02:00
Kubernetes Prow Robot
f51ea72ce0 Merge pull request #577 from a7i/amira/cronjob-ga
Use stable batch/v1 API Group for Kubernetes 1.21
2021-06-08 06:03:13 -07:00
Kubernetes Prow Robot
fe8d4c0d21 Merge pull request #572 from audip/feature/add-deployment-k8s-yaml-files
Add run descheduler as deployment
2021-06-08 05:47:14 -07:00
Kubernetes Prow Robot
3843a2d5d1 Merge pull request #550 from hanumanthan/highnodeutilisation
Highnodeutilization strategy
2021-06-08 01:59:12 -07:00
Kubernetes Prow Robot
839a237d6a Merge pull request #581 from jsravn/patch-1
Remove namespace from ClusterRoleBinding
2021-06-07 07:16:40 -07:00
Hanu
4cd1e66ef3 Adding highnodeutilization strategy 2021-06-06 18:01:42 +08:00
Hanu
2f18864fa5 Refractor - Modify the common functions to be used by high utilisation 2021-06-06 18:00:43 +08:00
Hanu
6e71068f4f Refractoring lownodeutilization - extracting common functions 2021-06-06 18:00:29 +08:00
James Ravn
e40620effa Remove namespace from ClusterRoleBinding
It's not namespace scoped. This breaks some tools like kpt.
2021-06-04 11:04:14 +01:00
Kubernetes Prow Robot
d7dc0abd7b Merge pull request #576 from a7i/amira/topology-spread-label-filter
Filter pods by labelSelector during eviction for TopologySpreadConstraint strategy
2021-06-01 23:38:37 -07:00
Amir Alavi
012ca2398f Filter pods by labelSelector during eviction for TopologySpreadConstraint strategy 2021-06-01 15:42:23 -04:00
Amir Alavi
f07089d7b3 Bump Helm Chart, kind, and Kubernetes version for helm-test 2021-06-01 12:44:05 -04:00
Amir Alavi
a54b59f208 Use stable batch/v1 API Group for Kubernetes 1.21 2021-06-01 12:44:05 -04:00
Kubernetes Prow Robot
bfd5feaf60 Merge pull request #559 from RyanDevlin/nodeFit
Working nodeFit feature
2021-05-31 09:06:26 -07:00
RyanDevlin
41d46d0d3b Working nodeFit feature 2021-05-24 09:03:38 -04:00
Aditya Purandare
646c13ae15 Fix grammar and indentation issue for deployment resource 2021-05-21 12:42:51 -07:00
Kubernetes Prow Robot
3b9d3d9719 Merge pull request #563 from ingvagabund/removeduplicates-take-taints-into-account-for-node-count
RemoveDuplicates: take node taints, node affinity and node selector into account when computing a number of feasible nodes for the average occurence of pods per node
2021-05-21 05:38:46 -07:00
Aditya Purandare
449383caa3 Add run descheduler as deployment files and update README 2021-05-20 17:04:05 -07:00
Kubernetes Prow Robot
31fd097c0a Merge pull request #527 from pravarag/add-helm-test
Add helm test
2021-05-19 06:10:51 -07:00
Kubernetes Prow Robot
11143d5b2c Merge pull request #570 from KohlsTechnology/bump-kind-version
Bump kind version
2021-05-19 05:40:51 -07:00
Sean Malloy
8480e03e9c Fail unit and e2e tests on any errors 2021-05-19 00:35:16 -05:00
Sean Malloy
0397425010 Bump Kind To v0.11.0
This is required for running e2e tests for k8s v1.21.
2021-05-18 23:37:10 -05:00
Jan Chaloupka
5396282e3d RemoveDuplicates: take node taints, node affinity and node selector into account when computing a number of feasible nodes for the average occurence of pods per node
Nodes with taints which are not tolerated by evicted pods will never run the
pods. The same holds for node affinity and node selector.
So increase the number of pods per feasible nodes to decrease the
number of evicted pods.
2021-05-18 16:13:24 +02:00
Kubernetes Prow Robot
a9ff644de6 Merge pull request #568 from BinacsLee/binacs-pkg-descheduler-descheduler_test-fix-errorhandling
Add verify script for govet & fix pkg/descheduler/descheduler_test.go
2021-05-17 08:18:23 -07:00
BinacsLee
fe8e17f72c fix staticcheck failure for pkg/descheduler/descheduler_test.go 2021-05-17 23:07:12 +08:00
Kubernetes Prow Robot
a1709e9edd Merge pull request #567 from a7i/topology-taint-toleration
RemovePodsViolatingTopologySpreadConstraint : Take node's taints into consideration when balancing domains
2021-05-14 12:41:58 -07:00
Amir Alavi
24c0ca2ef9 Take node's taints into consideration when balancing domains 2021-05-14 15:23:58 -04:00
Kubernetes Prow Robot
9b26abd538 Merge pull request #565 from damemi/issue-564
Add test cases for soft constraints/multi constraints
2021-05-12 19:14:24 -07:00
Mike Dame
fc83c13166 Add test cases for soft constraints/multi constraints 2021-05-12 08:36:07 -04:00
Kubernetes Prow Robot
9b69962053 Merge pull request #535 from ingvagabund/e2e-refactor
E2e refactor
2021-05-11 06:33:36 -07:00
Jan Chaloupka
4edbecc85d Define NodeSelectorsEqual predicate 2021-05-09 18:08:32 +02:00
Jan Chaloupka
54f67266bb Define TolerationsEqual 2021-05-09 18:08:27 +02:00
Donggeun Lee
4ba48b018c Add tolerations to cronjob in helm chart 2021-05-04 19:56:08 +09:00
Kubernetes Prow Robot
2a3529c543 Merge pull request #560 from damemi/update-verify-messages
Update error messages in verify scripts to be more informative
2021-04-30 20:05:59 -07:00
Mike Dame
58408d710b Update error messages in verify scripts to be more informative 2021-04-30 16:56:16 -04:00
Kubernetes Prow Robot
161f66a12f Merge pull request #558 from KohlsTechnology/structured-logging
Use Structured Logging For Unknown Strategy Log Message
2021-04-28 23:28:50 -07:00
Sean Malloy
6bde95c9a1 Use Structured Logging For Unknown Strategy Log Message
Always use structured logging. Therefore update klog.Errorf() to instead
use klog.ErrorS().

Here is an example of the new log message.

E0428 23:58:57.048912 586 descheduler.go:145] "skipping strategy" err="unknown strategy name" strategy=ASDFPodLifeTime
2021-04-29 00:00:07 -05:00
Kubernetes Prow Robot
724ff8a188 Merge pull request #556 from damemi/change-main-loop
Invert main strategy loop for performance and customizability
2021-04-28 07:52:50 -07:00
Mike Dame
feae158a50 Invert main strategy loop for performance and customizability 2021-04-28 10:36:02 -04:00
Kubernetes Prow Robot
780ac7a51e Merge pull request #554 from BinacsLee/binacs-utils-predicates-cleanup
code cleanup: remove check on length
2021-04-26 07:19:01 -07:00
BinacsLee
c4afb6bb30 code cleanup: remove check on length 2021-04-25 21:44:20 +08:00
Pravar Agrawal
8b5c4e805d update docs with helm test info 2021-04-15 15:56:42 +05:30
Jan Chaloupka
f4e24a408f Drop klog 2021-04-14 09:03:38 +02:00
Jan Chaloupka
2781106d49 TestEvictAnnotation: replace LowNodeUtilization strategy with PodLifetime
PodLifetime is simpler in validating results
2021-04-14 09:03:31 +02:00
Jan Chaloupka
534a30a058 e2e: deleteRC: replace loop with wait.PollImmediate 2021-04-14 09:03:26 +02:00
Jan Chaloupka
bb55741320 Update vendor 2021-04-14 09:03:21 +02:00
Jan Chaloupka
079bd6157b e2e: TestLowNodeUtilization: normalize nodes before running the strategy 2021-04-14 09:03:15 +02:00
Pravar Agrawal
92cb1b23ed add helm-test configurations 2021-04-14 10:29:31 +05:30
Kubernetes Prow Robot
832facc526 Merge pull request #537 from KohlsTechnology/docs-0.21.0
Update Docs and Manifests for v0.21.0
2021-04-13 08:24:52 -07:00
Kubernetes Prow Robot
c4fa6c472f Merge pull request #548 from lx1036/feature/fix-testcase
Updating policy api version used in pod evictor
2021-04-13 08:12:50 -07:00
Xiang Liu
a848dac3cf Updating policy api version used in pod evictor 2021-04-13 11:02:01 +08:00
Kubernetes Prow Robot
43a2ccf9c4 Merge pull request #546 from ingvagabund/add-diagram
Add diagram of strategies
2021-04-12 13:38:15 -07:00
Jan Chaloupka
60cf3aeb95 Add diagram of strategies 2021-04-12 09:51:54 +02:00
Kubernetes Prow Robot
84b174e841 Merge pull request #547 from KohlsTechnology/bump-1.21.0
Bump To k8s 1.21.0
2021-04-08 23:00:48 -07:00
Sean Malloy
40337d064d Bump To k8s 1.21.0 2021-04-08 23:36:06 -05:00
Kubernetes Prow Robot
9fe585c854 Merge pull request #545 from pravarag/add-verify-script-deep-copies
Add verify script for deep-copies generator
2021-04-08 21:20:35 -07:00
Pravar Agrawal
4fce2ca2f1 add verify script for deep-copies gen 2021-04-08 22:51:24 +05:30
Kubernetes Prow Robot
4c11de0403 Merge pull request #507 from pravarag/add-verify-scripts
Add verify scripts for make gen to run during PR
2021-04-07 21:41:41 -07:00
Pravar Agrawal
a9099efc45 add verify scripts for conversions gen
Signed-off-by: Pravar Agrawal <pravaag1@in.ibm.com>
2021-04-08 09:56:32 +05:30
Kubernetes Prow Robot
6edb644f2e Merge pull request #544 from ingvagabund/lnu-improve-node-usage-logging
LNU: improve nodeUsage logging
2021-04-06 23:45:53 -07:00
Jan Chaloupka
c239e1199f LNU: improve nodeUsage logging
To avoid:
```
I0210 11:56:04.137956 3309277 lownodeutilization.go:389] "Updated node usage" updatedUsage={node:0xc000460000 usage:map[cpu:0xc00042b480 memory:0xc00042b4c0 pods:0xc00042b500] allPods:[0xc0004a0000 0xc0004a03e8 0xc0004a07d0 0xc0004a0bb8 0xc0004a0fa0 0xc0004a1388 0xc0004a1770 0xc0004a1b58] lowResourceThreshold:map[cpu:0xc00042b540 memory:0xc00042b580 pods:0xc00042b5c0] highResourceThreshold:map[cpu:0xc00042b600 memory:0xc00042b640 pods:0xc00042b680]}
I0210 11:56:04.138829 3309277 lownodeutilization.go:389] "Updated node usage" updatedUsage={node:0xc000460000 usage:map[cpu:0xc00042b480 memory:0xc00042b4c0 pods:0xc00042b500] allPods:[0xc0004a0000 0xc0004a03e8 0xc0004a07d0 0xc0004a0bb8 0xc0004a0fa0 0xc0004a1388 0xc0004a1770 0xc0004a1b58] lowResourceThreshold:map[cpu:0xc00042b540 memory:0xc00042b580 pods:0xc00042b5c0] highResourceThreshold:map[cpu:0xc00042b600 memory:0xc00042b640 pods:0xc00042b680]}
I0210 11:56:04.139044 3309277 lownodeutilization.go:389] "Updated node usage" updatedUsage={node:0xc000460000 usage:map[cpu:0xc00042b480 memory:0xc00042b4c0 pods:0xc00042b500] allPods:[0xc0004a0000 0xc0004a03e8 0xc0004a07d0 0xc0004a0bb8 0xc0004a0fa0 0xc0004a1388 0xc0004a1770 0xc0004a1b58] lowResourceThreshold:map[cpu:0xc00042b540 memory:0xc00042b580 pods:0xc00042b5c0] highResourceThreshold:map[cpu:0xc00042b600 memory:0xc00042b640 pods:0xc00042b680]}
```
2021-04-06 09:43:23 +02:00
Kubernetes Prow Robot
b713b7852a Merge pull request #434 from ZongqiangZhang/extend-resources
Support extended resources in LowNodeUtilization
2021-04-06 00:21:34 -07:00
Kubernetes Prow Robot
5d07d0c8e2 Merge pull request #543 from gaurav1086/e2e_test_fix_goroutine_leak
e2e_test: fix goroutine leak
2021-04-05 18:11:19 -07:00
Gaurav Singh
7076ba0760 e2e_test: fix goroutine leak 2021-04-04 21:19:56 -04:00
ZongqiangZhang
81b816d4a4 support extended resources in lownodeutilization 2021-04-02 21:37:51 +08:00
Kubernetes Prow Robot
9ebc909c7f Merge pull request #541 from KohlsTechnology/make-gen
Update Generated Code
2021-04-01 00:39:22 -07:00
Sean Malloy
af01b675b0 Update Generated Code
Ran "make gen" using Go 1.16.1. Some changes were merged, but "make gen"
was not run. This fixes the problem.

See below PR for reference:
https://github.com/kubernetes-sigs/descheduler/pull/523
2021-04-01 00:28:20 -05:00
Kubernetes Prow Robot
ce6ce5a058 Merge pull request #539 from damemi/1.21-rc.0
Bump to k8s 1.21-rc.0
2021-03-31 16:55:21 -07:00
Mike Dame
bd4f6d4fcd Bump to k8s 1.21-rc.0 2021-03-31 10:22:56 -04:00
Sean Malloy
6a4181158a Update Docs and Manifests for v0.21.0
* Added v0.21 references to README
* Update k8s manifests with v0.21.0 references
* Added table with list of supported architectures by release
2021-03-31 00:53:19 -05:00
Kubernetes Prow Robot
a2746d09e8 Merge pull request #523 from RyanDevlin/evict-critical
Added EvictSystemCriticalPods flag to descheduler
2021-03-30 10:41:57 -07:00
RyanDevlin
b5d7219391 Completed evictSystemCriticalPods feature 2021-03-29 23:13:05 -04:00
Kubernetes Prow Robot
b09d5d99dc Merge pull request #534 from KohlsTechnology/kustomize-docs
Use Tags In Kustomize Documentation
2021-03-27 23:46:44 -07:00
Sean Malloy
dbcc20f37f Use Tags In Kustomize Documentation
The master branch always represents the next release of the
descheduler. Therefore applying the descheduler k8s manifests
from the master branch is not considered stable. It is best for
users to install descheduler using the released tags.
2021-03-27 01:21:08 -05:00
Kubernetes Prow Robot
51340b56b8 Merge pull request #533 from ingvagabund/bump-to-go1.16
Bump go to 1.16
2021-03-26 23:20:43 -07:00
Jan Chaloupka
160669817e Bump go to 1.16 2021-03-25 10:13:40 +01:00
Kubernetes Prow Robot
6ca4479892 Merge pull request #520 from KohlsTechnology/statefulset-docs
Document That Descheduler Considers StatefulSets For Eviction
2021-03-23 08:15:36 -07:00
Sean Malloy
92740a25d4 Add Initial Unit Tests For StatefulSets 2021-03-23 09:24:08 -05:00
Sean Malloy
56e4daccaf Document That Descheduler Considers StatefulSets For Eviction
Similar to ReplicaSet, ReplicationController, and Jobs pods with a
StatefulSet metadata.ownerReference are considered for eviction.
Document this, so that it is clear to end users.
2021-03-23 09:24:08 -05:00
Kubernetes Prow Robot
b546832b66 Merge pull request #526 from KohlsTechnology/chart-docs-fix
Correct Helm Chart Docs For Container Requests and Limits
2021-03-15 08:59:05 -07:00
Kubernetes Prow Robot
39e5b34af3 Merge pull request #525 from damemi/topology-spread-selector-fix
(TopologySpread) Evict pods with selectors that match multiple nodes
2021-03-14 01:35:04 -08:00
Sean Malloy
e699e08d13 Correct Helm Chart Docs For Container Requests and Limits
The resources.cpuRequest and resources.memoryRequest varialbes are
not valid in the helm chart values.yaml file. The correct varialbe
name for setting the requests and limits is resources.

Also, fixed white space alignment in the markdown table.
2021-03-12 23:17:02 -06:00
Mike Dame
af26b57e5e (TopologySpread) Evict pods with selectors that match multiple nodes 2021-03-12 13:41:17 -05:00
Kubernetes Prow Robot
22fe589ae6 Merge pull request #508 from KohlsTechnology/emeritus-approvers
Move Inactive Maintainers to Emeritus Status
2021-03-12 09:06:18 -08:00
Kubernetes Prow Robot
0a11b5a138 Merge pull request #521 from KohlsTechnology/armv7
Enable ARM32v7 Container Image Builds
2021-03-09 17:07:13 -08:00
Sean Malloy
363f02710b Enable ARM32v7 Container Image Builds
Add ARM32v7 in addition to the currently supported architectures. This
will allow running descheduler on Raspberry Pi v3 devices.
2021-03-09 09:03:58 -06:00
Kubernetes Prow Robot
6abfa232e7 Merge pull request #518 from damemi/sigs-k8s-mdtoc
Add table of contents generator/verify script
2021-03-08 12:06:59 -08:00
Mike Dame
bbfb12a120 Run hack/update-toc.sh 2021-03-08 14:39:08 -05:00
Mike Dame
5df2a0c516 Add hack scripts and makefile targets 2021-03-08 14:39:08 -05:00
Mike Dame
8ecd14289a Add <toc> markers to README 2021-03-08 10:51:55 -05:00
Mike Dame
131ed42a4c Add sigs.k8s.io/mdtoc dependency 2021-03-08 10:46:27 -05:00
Kubernetes Prow Robot
6b8d4cd5a7 Merge pull request #517 from lixiang233/fix_topology_log
Correct log in topology spread strategy
2021-03-07 11:31:42 -08:00
Kubernetes Prow Robot
24a06511a2 Merge pull request #505 from ingvagabund/collect-metrics
Collect metrics
2021-03-06 21:41:42 -08:00
lixiang
09c7d1be0a Correct log in topology spread strategy 2021-03-06 16:21:46 +08:00
Kubernetes Prow Robot
f5666882de Merge pull request #515 from damemi/clarify-pod-percentage-readme
Clarify resource percentage calculation in README
2021-03-05 07:36:22 -08:00
Jan Chaloupka
701f22404b Serve secure metrics at 10258 2021-03-05 16:35:45 +01:00
Mike Dame
d5fa60bdd5 Clarify resource percentage calculation in README 2021-03-05 09:34:04 -05:00
Kubernetes Prow Robot
9e14f733b7 Merge pull request #510 from lixiang233/Ft_filter_by_label
Filter pods by labelSelector
2021-03-05 01:14:22 -08:00
lixiang
29ade13ce7 README and e2e-testcase add for labelSelector 2021-03-04 21:32:14 +08:00
lixiang
03518badb8 Strategies: Add labelSelector to all strategies except LowNodeUtilization, RemoveDuplicates and RemovePodsViolatingTopologySpreadConstraint. 2021-03-04 21:30:50 +08:00
Jan Chaloupka
24458fb0ca Increase pods_evicted metric 2021-03-03 16:15:21 +01:00
Jan Chaloupka
1c5b32763b Register metrics
New metrics:
- build_info: Build info about descheduler, including Go version, Descheduler version, Git SHA, Git branch
- pods_evicted: Number of successfully evicted pods, by the result, by the strategy, by the namespace
2021-03-03 16:15:09 +01:00
Jan Chaloupka
3bd031bbb3 Move build's versioning bits under pkg/version 2021-03-03 15:56:53 +01:00
Jan Chaloupka
ea6e9f22b9 Vendor metrics 2021-03-03 15:56:48 +01:00
Kubernetes Prow Robot
73309a3948 Merge pull request #490 from damemi/logging-format-default
Add default logging-format value
2021-03-03 06:49:20 -08:00
Kubernetes Prow Robot
105313a0e3 Merge pull request #512 from ingvagabund/bump-gogo-protobuf-to-1.3.2
Bump github.com/gogo/protobuf to v1.3.2
2021-03-02 07:05:19 -08:00
Jan Chaloupka
d368cbed32 Bump github.com/gogo/protobuf to v1.3.2
Fixes https://cve.mitre.org/cgi-bin/cvename.cgi?name=CVE-2021-3121
2021-03-02 14:48:32 +01:00
Sean Malloy
5f4dfbc922 Update Maintainer Field in Dockerfiles
Set the maintainer field to use the more generic SIG scheduling name and
email address.
2021-03-01 23:19:42 -06:00
Sean Malloy
967911e4c1 Move Inactive Maintainers to Emeritus Status
For roughly the past year damemi has been the only active approver for
the descheduler. Therefore move the inactive approvers to emeritus
status. This will help clarify to contributors who should be assigned to
pull requests.
2021-03-01 09:23:49 -06:00
Kubernetes Prow Robot
726091712d Merge pull request #509 from KohlsTechnology/update-reviwers
Add lixiang233 As A Reviewer
2021-03-01 06:45:25 -08:00
lixiang
854afa7c73 PodsListing: Add WithLabelSelector option. 2021-03-01 11:13:44 +08:00
lixiang
2517268b1f API: Add a new parameter labelSelector to StrategyParameters. 2021-02-26 15:49:00 +08:00
Sean Malloy
fea8beabab Add lixiang233 As A Reviewer 2021-02-25 22:03:50 -06:00
Kubernetes Prow Robot
7f9c95fa16 Merge pull request #506 from KohlsTechnology/kind-0.10.0
Update e2e tests to use kind v0.10.0
2021-02-25 07:29:27 -08:00
Sean Malloy
6f7e2a271e Update e2e tests to use kind v0.10.0 2021-02-25 00:17:06 -06:00
Kubernetes Prow Robot
217ebdfa73 Merge pull request #504 from lixiang233/Fix_lowNodeUtilization_log
Log and README optimization for LowNodeUtilization
2021-02-24 05:22:50 -08:00
lixiang
e014fda58e Log and README optimization for LowNodeUtilization 2021-02-24 09:17:07 +08:00
Kubernetes Prow Robot
6d0360fd16 Merge pull request #502 from damemi/node-affinity-totalevicted
Move 'total pods evicted' log message to main loop
2021-02-19 13:45:41 -08:00
Mike Dame
01a87b6143 Move 'total pods evicted' log message to main loop 2021-02-19 10:37:43 -05:00
Kubernetes Prow Robot
f3e871492c Merge pull request #499 from KohlsTechnology/go-1.15.8
Bump To Go 1.15.8
2021-02-18 05:52:57 -08:00
Kubernetes Prow Robot
bf8d744686 Merge pull request #498 from kristinnardal2/patch-1
Fix indentation in values.yaml
2021-02-18 05:52:51 -08:00
Kubernetes Prow Robot
477d12968e Merge pull request #496 from KohlsTechnology/nonroot-helm-chart
Update Helm Chart to run as non-root
2021-02-18 00:34:51 -08:00
Sean Malloy
dcb4136a96 Bump To Go 1.15.8
The main k/k repo was updated to Go 1.15.8 for the upcoming k8s v1.21.0
release. See below PR for reference.

https://github.com/kubernetes/release/issues/1895
2021-02-18 00:00:31 -06:00
Kristinn Björgvin Árdal
2c0afafccf Fix indentation in values.yaml
A couple of lines had an extra whitespace.
2021-02-17 08:51:26 +01:00
Kubernetes Prow Robot
e56617253f Merge pull request #497 from ingvagabund/resourceUsagePercentages-unit-test
LowNodeUtilization: unit test resourceUsagePercentages to validate percentages are computed correctly
2021-02-11 20:26:48 -08:00
Jan Chaloupka
500aaea4dd LowNodeUtilization: unit test resourceUsagePercentages to validate percentages are computed correctly 2021-02-11 16:04:37 +01:00
Sean Malloy
4444811f26 Update Helm Chart to run as non-root 2021-02-10 21:49:01 -06:00
Kubernetes Prow Robot
b63c6fac27 Merge pull request #495 from KohlsTechnology/nonroot
Update Job and CronJob YAML to run as non-root
2021-02-10 08:22:59 -08:00
Sean Malloy
dfc76906d4 Update Job and CronJob YAML to run as non-root 2021-02-09 23:10:18 -06:00
Kubernetes Prow Robot
fbd17d4caf Merge pull request #480 from damemi/nodeselector-cronjob
Add nodeSelector to cronJob helm chart
2021-02-09 10:32:59 -08:00
Mike Dame
9c01589fb9 Add nodeSelector to cronJob helm chart 2021-02-09 13:27:26 -05:00
Kubernetes Prow Robot
a0942afaa1 Merge pull request #493 from fancc/resource_usage_percentage
Modify resourceUsage.Value() to resourceUsage.MilliValue()
2021-02-02 03:38:28 -08:00
范成城
16fa21a4a6 change resourceUsagePercentage func 2021-01-31 20:46:46 +08:00
Kubernetes Prow Robot
241f1325c9 Merge pull request #481 from damemi/ignore-pvc-pods
Add option to ignore pods with PVCs from eviction
2021-01-26 22:41:40 -08:00
Mike Dame
c1a63a557a Add option to ignore pods with PVCs from eviction 2021-01-26 08:47:54 -05:00
Mike Dame
e45e21368a Add default logging-format value 2021-01-25 14:19:58 -05:00
Kubernetes Prow Robot
f24b367479 Merge pull request #474 from lixiang233/Ft_include_soft_constraints
Add a parameter to include soft topology spread constraints
2021-01-21 09:21:06 -08:00
lixiang
8ba9cb1df7 Add a parameter to include soft topology spread constraints 2021-01-21 11:33:16 +08:00
Kubernetes Prow Robot
d502f05910 Merge pull request #484 from damemi/topology-spread-bug
Fix TopologySpread bug that evicts non-evictable pods
2021-01-12 17:26:36 -08:00
Mike Dame
241f47d947 Fix TopologySpread bug that evicts non-evictable pods 2021-01-12 15:25:55 -05:00
Kubernetes Prow Robot
19424f4119 Merge pull request #482 from damemi/fix-logging-gen
Add Logging field to v1alpha1 componentconfig
2021-01-11 09:44:25 -08:00
Mike Dame
635a40f305 Add Logging field to v1alpha1 componentconfig 2021-01-11 11:21:32 -05:00
Kubernetes Prow Robot
1804d2e3a2 Merge pull request #478 from ryuichi1208/master
Update README.md
2021-01-10 11:43:05 -08:00
Ryuichi Watanabe
fea4870243 Update README.md 2021-01-01 18:45:00 +09:00
Kubernetes Prow Robot
f54df67d11 Merge pull request #471 from Mathew857/master
refactor: remove unused code
2020-12-15 17:20:20 -08:00
wu.chaozong
1a998037f8 refactor: remove unuse param in e2e_test.go 2020-12-16 08:46:40 +08:00
wu.chaozong
c481877c03 refactor: update node_test file 2020-12-15 21:15:33 +08:00
Kubernetes Prow Robot
67df39690b Merge pull request #468 from eatwithforks/jylee/user
Run as user 1000 so pod can be runAsNonRoot for security purposes
2020-12-14 06:43:28 -08:00
wu.chaozong
674f14da78 refactor: remove unused code 2020-12-13 00:09:07 +08:00
Jye Lee
0cfbdf642b Run as user 1000 so pod can be runAsNonRoot for security purposes 2020-12-10 09:37:49 -08:00
Kubernetes Prow Robot
c86d1c7eb2 Merge pull request #463 from ingvagabund/duplicates-uniformly
RemoveDuplicatePods: evict uniformly
2020-12-10 07:14:13 -08:00
Jan Chaloupka
f67c265533 RemoveDuplicatePods: evict uniformly 2020-12-10 15:33:04 +01:00
Kubernetes Prow Robot
969921640c Merge pull request #465 from damemi/skip-balanced-topology
Skip topology calculations if domains are already balanced
2020-12-09 19:48:53 -08:00
Mike Dame
0273fd7597 Skip topology calculations if domains are already balanced 2020-12-09 11:29:59 -05:00
Kubernetes Prow Robot
e84d0c5587 Merge pull request #464 from damemi/1.20-bump
Bump to v0.20.0
2020-12-09 07:12:50 -08:00
Kubernetes Prow Robot
5267ec407c Merge pull request #462 from KohlsTechnology/docs-v0.20.0
Update Docs and Manifests for v0.20.0
2020-12-09 07:04:50 -08:00
Mike Dame
6714d8e0b7 Bump to v0.20.0 2020-12-09 09:53:03 -05:00
Sean Malloy
b3439eab41 Update Docs and Manifests for v0.20.0
* Added v0.20 references to README
* Update k8s manifests with v0.20.0 references
2020-12-09 00:10:50 -06:00
Kubernetes Prow Robot
509118587a Merge pull request #453 from dieterdemeyer/master
Allow setting options successfulJobsHistoryLimit and failedJobsHistoryLimit for cronjob
2020-12-08 14:33:33 -08:00
Kubernetes Prow Robot
f482537dff Merge pull request #455 from AmoVanB/fix/cluster-role-tsc
Helm: allow the topologySpreadConstraint strategy to list namespaces
2020-12-08 11:17:35 -08:00
Amaury Van Bemten
0f95817746 add permission to list namespaces for the topologyspread constraint strategy 2020-12-08 17:11:34 +01:00
Kubernetes Prow Robot
70d1fadae7 Merge pull request #454 from damemi/topology-spread-nsfix
Fix broken namespace logic in TopologySpreadConstraint
2020-12-04 09:34:00 -08:00
Mike Dame
499beb2fd7 Fix broken namespace logic in TopologySpreadConstraint 2020-12-04 10:49:58 -05:00
Kubernetes Prow Robot
de24f3854b Merge pull request #449 from KohlsTechnology/arm-image
Enable Multi-Arch Container Image Builds
2020-12-03 17:55:25 -08:00
Kubernetes Prow Robot
a5e8ba1a70 Merge pull request #452 from KohlsTechnology/go-1.15.5
Bump To Go 1.15.5
2020-12-03 06:21:00 -08:00
Dieter De Meyer
45ad48042f Allow setting options successfulJobsHistoryLimit and failedJobsHistoryLimit for cronjob 2020-12-03 11:56:44 +01:00
Sean Malloy
550de966c7 Bump To Go 1.15.5
The main k/k repo as updated to Go 1.15.5 for the upcoming k8s v1.20.0
release. See below PR for reference.

https://github.com/kubernetes/kubernetes/pull/95776
2020-12-03 01:16:42 -06:00
Kubernetes Prow Robot
c94342db31 Merge pull request #451 from damemi/v0.20.0-rc.0
bump k8s dependencies to 1.20-rc.0
2020-12-02 23:13:00 -08:00
Sean Malloy
94f1c7dd8d Document Multi-Arch Container Image Usage 2020-12-03 00:44:19 -06:00
Sean Malloy
bf91e6790e Enable Multi-Arch Container Image Builds
Previous to this change official descheduler container images only
supported the AMD64 hardware architecture. This change enables
building official descheduler container images for multiple
architectures.

The initially supported architectures are AMD64 and ARM64.
2020-12-03 00:06:22 -06:00
Mike Dame
251f44e568 bump(*): k8s to 1.20-rc.0 2020-12-02 14:40:50 -05:00
Kubernetes Prow Robot
922c4f6a63 Merge pull request #448 from damemi/topology-spread-logging
Add more topology spread logs
2020-12-01 15:10:50 -08:00
Mike Dame
2b5ec01381 Add more topology spread logs 2020-12-01 17:02:46 -05:00
Kubernetes Prow Robot
7bcd562ff5 Merge pull request #444 from KohlsTechnology/issue-432-cronjob
Update Helm Chart To Allow Setting startingDeadlineSeconds
2020-11-24 12:10:40 -08:00
Sean Malloy
03852d0914 Update Helm Chart To Allow Setting startingDeadlineSeconds
Allow end users to optionally set the descheduler CronJob
.spec.startingDeadlineSeconds when installing using helm.
2020-11-24 10:58:16 -06:00
Kubernetes Prow Robot
5f1e9a97c4 Merge pull request #446 from ingvagabund/low-node-util-percentages-times-100
LowNodeUtilization: express usagePercentage multiplied by 100
2020-11-24 08:13:02 -08:00
Jan Chaloupka
cd6f2cd4cb LowNodeUtilization: express usagePercentage multiplied by 100
Entry params are in interval <0; 100>. Have logs respect that as well.
2020-11-24 14:48:48 +01:00
Kubernetes Prow Robot
e679c7fabc Merge pull request #442 from KohlsTechnology/issue-432
Set Container Resources In YAML Manifests
2020-11-23 08:49:34 -08:00
Kubernetes Prow Robot
6f5918d765 Merge pull request #443 from KohlsTechnology/issue-432-helm
Set Container Resources In Helm Chart
2020-11-23 07:47:33 -08:00
Sean Malloy
5a46ba0630 Set Container Resources In Helm Chart
Prior to this commit the helm chart used to install the descheduler
CronJob did not set container requests or limits. This is considered
an anti-pattern when deploying applications on k8s.

Set descheduler container resources to make it a burstable pod. This
will ensure a high quality experience for end users when deploying
descheduler into their clusters using the instructions from the README.

The default values choosen for CPU/Memory are not based on any real data.
2020-11-23 09:12:54 -06:00
Kubernetes Prow Robot
c1323719f4 Merge pull request #427 from KohlsTechnology/fix-version-output
Fix Version Output For Automated Container Builds
2020-11-21 03:19:33 -08:00
Sean Malloy
8795fe6b90 Fix Version Output For Automated Container Builds
Prior to this change the output from the command "descheduler version"
when run using the official container images from k8s.gcr.io would
always output an empty string. See below for an example.

```
docker run k8s.gcr.io/descheduler/descheduler:v0.19.0 /bin/descheduler version
Descheduler version {Major: Minor: GitCommit: GitVersion: BuildDate:2020-09-01T16:43:23+0000 GoVersion:go1.15 Compiler:gc Platform:linux/amd64}
```

This change makes it possible to pass the descheduler version
information to the automated container image build process and
also makes it work for local builds too.
2020-11-20 23:25:22 -06:00
Sean Malloy
a3f8bb0369 Set Container Resources In YAML Manifests
Prior to this commit the YAML manifests used to install the descheduler
Job and CronJob did not set container requests or limits. This is
considered an anti-pattern when deploying applications on k8s.

Set descheduler container resources to make it a burstable pod. This
will ensure a high quality experience for end users when deploying
descheduler into their clusters using the instructions from the README.

The values choosen for CPU/Memory are not based on any real data.
2020-11-20 12:55:10 -06:00
Kubernetes Prow Robot
cd3c3bf4da Merge pull request #441 from ingvagabund/low-node-util-dump-node-usage-in-percentage
LowNodeUtilization: show node usage in percentage as well
2020-11-18 08:44:06 -08:00
Jan Chaloupka
652ee87bf5 LowNodeUtilization: show node usage in percentage as well 2020-11-18 14:50:53 +01:00
Kubernetes Prow Robot
5225ec4597 Merge pull request #436 from stevehipwell/helm-chart-rename
Rename Helm chart
2020-11-17 22:42:05 -08:00
Kubernetes Prow Robot
b2af720ddb Merge pull request #438 from lixiang233/add_missing_parameter
Add missing parameter in README
2020-11-17 20:24:04 -08:00
lixiang
94f07996f7 add missing parameter in README 2020-11-17 16:10:02 +08:00
Kubernetes Prow Robot
4839d5f369 Merge pull request #413 from damemi/podtopologyspread
Add PodTopologySpread strategy
2020-11-13 10:41:05 -08:00
Steve Hipwell
022e07c278 Rename helm chart 2020-11-09 10:21:48 +00:00
Kubernetes Prow Robot
620d71abdf Merge pull request #433 from ankon/patch-1
Fix trivial typo in helm README
2020-11-03 08:50:04 -08:00
Andreas Kohn
85d00ab457 Fix trivial typo in helm README 2020-11-03 13:10:37 +01:00
Kubernetes Prow Robot
b30bd40860 Merge pull request #428 from damemi/component-helper-nodeselector
Start using helpers from k8s.io/component-helpers
2020-11-02 04:52:54 -08:00
Mayank Kumar
4108362158 Add RemovePodsViolatingTopologySpreadConstraint strategy
This adds a strategy to balance pod topology domains based on the scheduler's
PodTopologySpread constraints. It attempts to find the minimum number of pods
that should be sent for eviction by comparing the largest domains in a topology
with the smallest domains in that topology.
2020-10-30 16:36:33 -04:00
Mike Dame
b27dc5f14e go mod tidy && go mod vendor 2020-10-30 09:41:49 -04:00
Mike Dame
3c54378749 Use NodeSelectorRequirementsAsSelector from k8s.io/component-helpers 2020-10-30 09:41:49 -04:00
Kubernetes Prow Robot
6240aa68f7 Merge pull request #430 from damemi/verify-spelling
Add hack/verify-spelling.sh script and Makefile target
2020-10-22 08:27:36 -07:00
Mike Dame
301af7fd9c Add hack/verify-spelling.sh script and Makefile target 2020-10-22 10:20:53 -04:00
Kubernetes Prow Robot
41d529ebe2 Merge pull request #429 from invidian/invidian/fix-typo
pkg/descheduler/strategies: fix typo
2020-10-22 06:59:36 -07:00
Mateusz Gozdek
cc6bb633ba pkg/descheduler/pod: fix typo
Signed-off-by: Mateusz Gozdek <mgozdekof@gmail.com>
2020-10-22 09:25:55 +02:00
Mateusz Gozdek
31cf70c34c pkg/descheduler/strategies: fix typo
Signed-off-by: Mateusz Gozdek <mgozdekof@gmail.com>
2020-10-22 09:25:24 +02:00
Kubernetes Prow Robot
3399619395 Merge pull request #418 from invidian/psp
charts/descheduler: add PodSecurityPolicy support
2020-10-21 23:17:35 -07:00
Kubernetes Prow Robot
cfc4cce08b Merge pull request #421 from m3y/kustomize
Support for remote resources in kustomize
2020-10-21 01:02:20 -07:00
Mateusz Gozdek
f9e9f0654a charts/descheduler: add PodSecurityPolicy support
This commit adds restrictive PodSecurityPolicy, which can be
optionally created, so descheduler can be deployed on clusters with
PodSecurityPolicy admission controller, but which do not ship default
policies.

Signed-off-by: Mateusz Gozdek <mgozdekof@gmail.com>
2020-10-21 08:41:39 +02:00
m3y
73af0e84fa Support for remote resources in kustomize 2020-10-20 03:20:11 +09:00
Kubernetes Prow Robot
b33928ac91 Merge pull request #412 from farah/logging-format
Add logging-format flag
2020-10-12 10:58:48 -07:00
Ali Farah
3ac0c408de Add --logging-format flag
Add k8s.io/component-base/config package
2020-10-12 22:27:39 +11:00
Kubernetes Prow Robot
149f900811 Merge pull request #417 from plutzilla/master
Helm chart README fix
2020-10-07 22:46:15 -07:00
Paulius Leščinskas
9ede04ba9b Update README.md
`kube-system` namespace provided in the installation instruction.
2020-10-02 20:07:24 +03:00
Paulius Leščinskas
f9cbed8b71 Helm chart README fix
Helm 3 omits `--name` flag for release name.
2020-10-02 18:11:06 +03:00
Kubernetes Prow Robot
fa4da031e4 Merge pull request #416 from KohlsTechnology/go-1.15.2
Bump To Go 1.15.2
2020-10-02 07:09:20 -07:00
Sean Malloy
9511f308d0 Bump To Go 1.15.2 2020-10-02 01:02:36 -05:00
Kubernetes Prow Robot
52f43f0fcb Merge pull request #414 from KohlsTechnology/event-recorder-structured-logs
Update Event Logging to Use Structured Logging
2020-10-01 14:25:19 -07:00
Sean Malloy
4bb0ceeed5 Update Event Logging to Use Structured Logging 2020-10-01 00:50:37 -05:00
Kubernetes Prow Robot
279f648e9a Merge pull request #410 from lixiang233/Add_lowNodeUtilization_usecase
Add use case for lowNodeUtilization
2020-09-28 20:57:25 -07:00
Kubernetes Prow Robot
411ec740ff Merge pull request #411 from KohlsTechnology/klogv2
Convert Last Log Message To Structured Logging
2020-09-28 06:14:48 -07:00
Sean Malloy
6237ba5a43 Convert Last Log Message To Structured Logging
The k8s.io/klog/v2 package does not currently support structured logging
for warning level log messages. Therefore update the one call in the
code base using klog.Warningf to instead use klog.InfoS.
2020-09-25 22:58:28 -05:00
Kubernetes Prow Robot
5d65a9ad68 Merge pull request #409 from ingvagabund/flip-some-klog-info-to-error
Change klog info messages after a strategy is exited into error messages
2020-09-25 20:34:47 -07:00
Kubernetes Prow Robot
28f3f867c3 Merge pull request #407 from ingvagabund/structured-klog
Flip Info/Infof/Error to InfoS/ErrorS
2020-09-24 04:22:05 -07:00
lixiang
00f79aa28d add use case for lowNodeUtilization 2020-09-24 17:51:08 +08:00
lixiang
6042d717e9 delete disabled strategies in podLifeTime use case 2020-09-24 17:49:02 +08:00
Jan Chaloupka
7afa54519f Change klog info messages after a strategy is exited into error messages 2020-09-24 11:07:20 +02:00
Kubernetes Prow Robot
8c3a80fbf9 Merge pull request #406 from damemi/duplicates-namespace-filtering
Add Namespace filtering to RemoveDuplicates strategy
2020-09-22 18:16:08 -07:00
Mike Dame
11b9829885 Update README to include strategy params 2020-09-22 10:28:43 -04:00
Jan Chaloupka
4798559545 Flip Info/Infof/Error to InfoS/ErrorS 2020-09-21 09:08:11 +02:00
Mike Dame
8b34d6eb94 Add Namespace filtering to RemoveDuplicates strategy 2020-09-18 12:17:47 -04:00
Kubernetes Prow Robot
70700a1c97 Merge pull request #401 from KohlsTechnology/bump-kind
Update e2e tests to use kind v0.9.0
2020-09-15 09:58:07 -07:00
Sean Malloy
d7420eb945 Update e2e tests to use kind v0.9.0 2020-09-15 09:11:36 -05:00
Kubernetes Prow Robot
c9cfeb35c2 Merge pull request #384 from ingvagabund/refactor-low-node-utilization
Refactor low node utilization
2020-09-13 19:04:58 -07:00
Kubernetes Prow Robot
fda63a816f Merge pull request #397 from farah/farah/add-structured-logging
Convert logs to use structured logs
2020-09-13 18:46:57 -07:00
Ali Farah
6329b6c27b Convert logs to use structured logs 2020-09-12 14:46:16 +10:00
Jan Chaloupka
9b4f781c5c Be verbose about unschedulable nodes which are not considered as underutilized 2020-09-11 12:57:24 +02:00
Jan Chaloupka
63039fcfd6 Compute utilization absolutely, not relatively 2020-09-11 12:57:22 +02:00
Kubernetes Prow Robot
d25f3757d6 Merge pull request #393 from lixiang233/Ft_custom_pod_phase_PodLifeTime
PodLifeTime: allow custom podStatusPhases
2020-09-11 03:46:14 -07:00
lixiang
1303fe6eb9 PodLifeTime: allow custom podStatusPhases 2020-09-11 09:56:45 +08:00
Kubernetes Prow Robot
1682cc9462 Merge pull request #394 from farah/farah/add-structured-logging
Convert logs to use structured logging
2020-09-09 10:15:08 -07:00
Kubernetes Prow Robot
605927676f Merge pull request #395 from damemi/antiaffinity-evictable
Move IsEvictable check in PodAntiAffinity
2020-09-09 07:45:52 -07:00
Jan Chaloupka
dc41e6a41c Remove createNodePodsMap 2020-09-09 16:29:11 +02:00
Ali Farah
e37c27313e Convert logs to use structured logging 2020-09-10 00:22:24 +10:00
Mike Dame
e5d9756ebe Move IsEvictable check in PodAntiAffinity
While non-evictable pods should never be evicted, they should still be
considered when calculating PodAntiAffinity violations. For example, you
may have an evictable pod that should not be running next to a system-critical
static pod. We currently filter IsEvictable before checking for Affinity violations,
so this case would not be caught.
2020-09-09 09:46:50 -04:00
Kubernetes Prow Robot
e6f1c6f78a Merge pull request #392 from KohlsTechnology/helm-hub
Add Link To Helm Hub
2020-09-03 06:59:41 -07:00
Sean Malloy
fceebded6d Add Link To Helm Hub
Updated the README with the link to the official descheduler helm chart
on https://hub.helm.sh. This makes it easier for end users to install the
desceduler using helm.
2020-09-02 23:55:27 -05:00
Kubernetes Prow Robot
08b2dffa42 Merge pull request #376 from farah/farah/add-structured-logging
Change klog to use structured logging
2020-09-02 09:49:06 -07:00
Kubernetes Prow Robot
745e29959c Merge pull request #388 from damemi/helm-chart-1.19
Update Helm chart to v0.19.0
2020-09-01 09:41:53 -07:00
Kubernetes Prow Robot
aa1bab2c4a Merge pull request #389 from KohlsTechnology/docs-0.19
Update Docs and Manifests for v0.19.0
2020-08-31 12:59:50 -07:00
Sean Malloy
19c3e02b44 Update Docs and Manifests for v0.19.0
* Added v0.19 references to README
* Update k8s manifests with v0.19.0 references
2020-08-31 14:41:53 -05:00
Mike Dame
a45057200f Update Helm chart to v0.19.0 2020-08-31 15:39:26 -04:00
Kubernetes Prow Robot
74d6be3943 Merge pull request #371 from KohlsTechnology/go-1.15
Update To Go 1.15.0
2020-08-31 12:22:43 -07:00
Sean Malloy
1fb3445692 Fix golangci-lint Failures For 1.30.0 Upgrade 2020-08-31 14:03:43 -05:00
Sean Malloy
195082d33b Update To Go 1.15.0
As part of the k8s 1.19 release cycle the Go version is being bumped to
1.15.0. Updating the descheduler to use Go 1.15 prior to the descheduler
v0.19.0 release.

See below issues for reference:
* https://github.com/kubernetes/release/issues/1421
* https://github.com/kubernetes/kubernetes/issues/93484
2020-08-31 14:03:43 -05:00
Kubernetes Prow Robot
03dbc93961 Merge pull request #385 from ingvagabund/low-node-utilization-use-clientset-in-testing
LowNodeUtilization: use clientset in testing, drop all custom reactors
2020-08-31 11:48:44 -07:00
Jan Chaloupka
d27f64480b LowNodeUtilization: use clientset in testing, drop all custom reactors 2020-08-31 20:38:01 +02:00
Kubernetes Prow Robot
5645663b71 Merge pull request #387 from KohlsTechnology/contrib-docs
Add KUBECONFIG Export To Contributing Docs
2020-08-31 06:34:28 -07:00
Kubernetes Prow Robot
dbc8092282 Merge pull request #386 from KohlsTechnology/bump-1.19
Bump k8s Modules For k8s 1.19
2020-08-31 06:34:20 -07:00
Ali Farah
50d2b246d9 Change klog to use structured logging
Signed-off-by: Ali Farah <aliyfarah9@gmail.com>
2020-08-31 14:30:08 +10:00
Sean Malloy
6220aca03e Add KUBECONFIG Export To Contributing Docs 2020-08-28 00:24:29 -05:00
Sean Malloy
674993d23a Update Vendor Deps For k8s 1.19 2020-08-27 23:49:48 -05:00
Sean Malloy
f4c3f9b18f Bump k8s Modules For k8s 1.19 2020-08-27 23:48:58 -05:00
Kubernetes Prow Robot
d65a7c4783 Merge pull request #380 from ingvagabund/move-some-flags-under-descheduler-polic
Deprecate node-selector, max-pods-to-evict-per-node and evict-local-storage-pods flags and promote then to policy v1alpha1 fields
2020-08-21 05:15:39 -07:00
Jan Chaloupka
89541f7545 Deprecate node-selector, max-pods-to-evict-per-node and evict-local-storage-pods flags and promote then to policy v1alpha1 fields 2020-08-21 13:27:40 +02:00
Kubernetes Prow Robot
17f769c1c1 Merge pull request #382 from damemi/readme-toc
Add table of contents to README
2020-08-20 12:36:51 -07:00
Mike Dame
eb9e62f047 Add table of contents to README 2020-08-20 15:24:41 -04:00
Kubernetes Prow Robot
6ccd80f2ee Merge pull request #372 from ingvagabund/isevictable
Redefine IsEvictable to be customizable for a particular strategy
2020-08-19 02:53:11 -07:00
Jan Chaloupka
d8251b9086 Redefine IsEvictable to be customizable for a particular strategy
Use WithXXX methods to extend the list of constraints to also
provide a reasonable error message.
2020-08-19 11:21:26 +02:00
Kubernetes Prow Robot
4cd1f45d90 Merge pull request #375 from KohlsTechnology/helm-chart-update
Update Maintainer Details In Helm Chart
2020-08-17 06:42:19 -07:00
Sean Malloy
2dc3f53a13 Update Maintainer Details In Helm Chart
Changing the maintainer for the helm chart to SIG scheduling instead of
an individual person.
2020-08-13 22:15:18 -05:00
Kubernetes Prow Robot
f5d8a02f79 Merge pull request #374 from ingvagabund/namespace-as-pointer
Promote Namespaces field to a pointer
2020-08-13 19:04:21 -07:00
Jan Chaloupka
a7c51ffae0 Promote Namespaces field to a pointer 2020-08-13 18:38:46 +02:00
Kubernetes Prow Robot
9746fd300f Merge pull request #364 from lixiang233/ft_allow_custom_priority_threshold
Allow custom priority threshold
2020-08-12 07:35:45 -07:00
lixiang
b5e17f91cd ClusterRole: add permission to access priorityclasses.scheduling.k8s.io 2020-08-12 13:55:53 +08:00
lixiang
f5524153ba README: add description for priority threshold 2020-08-12 13:55:53 +08:00
lixiang
6d693d06fb e2e: add testcase for priority threshold 2020-08-12 13:55:53 +08:00
lixiang
4d7a6ee9be e2e: Add priority and priority class to runPodLifetimeStrategy and RcByNameContainer 2020-08-12 13:55:53 +08:00
lixiang
0fdaac6042 Strategy: Set threshold priority from strategy's parameters 2020-08-12 13:54:57 +08:00
Kubernetes Prow Robot
bb7ab369d7 Merge pull request #370 from damemi/1.19-rc.4
Bump k8s dependencies to 1.19-rc.4
2020-08-11 06:42:16 -07:00
Mike Dame
d96cca2221 go mod tidy && go mod vendor 2020-08-11 09:32:00 -04:00
Mike Dame
6ee87d9d7c Bump(k8s.io): to 1.19-rc.4 2020-08-11 09:31:43 -04:00
lixiang
95ce2a4ff7 PodEvictor: add a new param thresholdPriority to IsEvictable 2020-08-11 09:57:26 +08:00
Kubernetes Prow Robot
19e1387bf1 Merge pull request #369 from damemi/duplicates-panic
Add check for ownerref length in DuplicatePods strategy
2020-08-10 07:06:20 -07:00
Mike Dame
ec4c5bed5d Add check for ownerref length in DuplicatePods strategy
This fixes a panic that occurs if a pod has no ownerrefs
later on in the strategy. If a pod has no ownerrefs, there's
nothing for us to do with it.
2020-08-10 09:37:05 -04:00
lixiang
ae38aa63af StrategyParameters: Add new param ThresholdPriority and ThresholdPriorityClassName 2020-08-07 13:53:29 +08:00
Kubernetes Prow Robot
cdcd677aa0 Merge pull request #366 from lixiang233/podAntiAffinity_add_missing_validation
Add missing validation in PodAntiAffinity
2020-07-30 07:34:31 -07:00
lixiang
a5eb9fc36d Add missing validation in PodAntiAffinity 2020-07-30 16:44:03 +08:00
Kubernetes Prow Robot
96efd2312b Merge pull request #363 from KohlsTechnology/new-registry-name
Update Container Registry to k8s.gcr.io
2020-07-29 08:29:48 -07:00
Sean Malloy
e7699c4f6b Update Container Registry to k8s.gcr.io
The k8s project recently cut over to the new official k8s.gcr.io
container registry. The descheduler image can now be pulled from
k8s.gcr.io. This is just a new DNS name for the container registry. The
previously documented DNS names for the registry still work, but
require more typing.
2020-07-28 22:45:06 -05:00
Kubernetes Prow Robot
d0fbebb77c Merge pull request #337 from damemi/rebase-1.19
Rebase k8s dependencies to 1.19-rc.2
2020-07-28 10:47:47 -07:00
Kubernetes Prow Robot
46bb5b6f55 Merge pull request #362 from damemi/only-release-tags
Update version parsing to exclude helm-chart tags
2020-07-28 09:31:48 -07:00
Kubernetes Prow Robot
b799ed074a Merge pull request #361 from jjmengze/patch-1
remove unnecessary line feed  in log messages
2020-07-28 09:19:48 -07:00
Kubernetes Prow Robot
eee41ee111 Merge pull request #338 from ingvagabund/filter-out-pods-by-namespaces
Filter pods by namespaces
2020-07-28 08:57:47 -07:00
MengZeLee
6ac81e0b9c remove unnecessary line feed in log messages
If we remove these \n, there will be no misspell errors in go report
2020-07-28 23:27:23 +08:00
Mike Dame
5970899029 Update version parsing to exclude helm-chart tags 2020-07-28 10:59:27 -04:00
Mike Dame
5bb0389538 Update API scheme registration for 1.19 2020-07-28 10:24:21 -04:00
Mike Dame
92cb6a378a Change gnostic/openapiv2 to lowercase in git 2020-07-27 13:55:31 -04:00
Mike Dame
b09932e92a go mod tidy && go mod vendor 2020-07-27 13:55:29 -04:00
Mike Dame
63603f38d6 Pin k8s deps to 1.19-rc.2 2020-07-27 13:55:13 -04:00
Jan Chaloupka
42db31683f README: describe usage of the namespace filtering 2020-07-27 10:57:30 +02:00
Jan Chaloupka
c40a9c397f e2e: add test for included/excluded namespace through PodLifeTime strategy 2020-07-27 10:54:35 +02:00
Kubernetes Prow Robot
4014ebad92 Merge pull request #359 from KohlsTechnology/more-helm-docs
More Helm Documentation
2020-07-26 18:56:16 -07:00
Kubernetes Prow Robot
bb7cb05571 Merge pull request #360 from dharmab/descheduler-autoheal-guide
Add NPD+CA autohealing use case to user guide
2020-07-24 15:20:16 -07:00
Dharma Bellamkonda
30b2bd5d9f Add NPD+CA autohealing use case to user guide 2020-07-24 15:40:47 -06:00
Sean Malloy
8d5ab05aa0 Add Helm Badge To README 2020-07-23 22:14:22 -05:00
Sean Malloy
db501da34d Clean Up End User Helm Documentation
* Correct helm install command in documentation
* Add link to helm install docs from main README
2020-07-23 22:11:17 -05:00
Kubernetes Prow Robot
8d60370612 Merge pull request #358 from damemi/helm-chart-name-docs
Update helm chart name in docs
2020-07-23 16:38:22 -07:00
Mike Dame
052f011288 Update helm chart name in docs 2020-07-23 15:42:40 -04:00
Kubernetes Prow Robot
6e23579bd0 Merge pull request #356 from damemi/update-helm-action
Update Helm release action to work on release branches
2020-07-23 11:05:29 -07:00
Mike Dame
7331f4e5de Update Helm release action to work on release branches
Basing this action on push to `chart-*` tags doesn't work: the action itself
creates the new release tag, so trying to push the changes to a tag ends up
with the releaser comparing the changes to themself (and failing).

This also renames the chart from "descheduler" to "descheduler-helm-chart", to
avoid confusion with automated releases.
2020-07-22 17:12:54 -04:00
Jan Chaloupka
11f1333af7 ListPodsOnANode: allow to include/exclude namespaces
Info: field selector is still not properly mocked so it's not possible to unit test it
2020-07-21 15:08:05 +02:00
Jan Chaloupka
74f70fdbc9 ListPodsOnANode: define Options type to pass various options
Options like:
- filter
- included/excluded namespaces
- labels
2020-07-21 15:07:45 +02:00
Jan Chaloupka
0006fb039d ListPodsOnANode: have one function parameter per each line 2020-07-21 15:02:34 +02:00
Kubernetes Prow Robot
0894f7740c Merge pull request #343 from ingvagabund/test-cleanup
Clean e2e test so it's easier to extend it
2020-07-21 05:21:13 -07:00
Kubernetes Prow Robot
c3f07dc366 Merge pull request #351 from KohlsTechnology/helm-release-docs
Update Release Documentation For Helm Charts
2020-07-20 20:33:14 -07:00
Sean Malloy
ca8f1051eb Update Helm Chart Version To 0.18.0 2020-07-20 12:52:38 -05:00
Sean Malloy
c7692a2e9f Update Release Documentation For Helm Charts
The descheduler now has an official helm chart. This change documents the
process to release a new helm chart artifact.
2020-07-20 12:51:50 -05:00
Jan Chaloupka
53badf7b61 e2e: create dedicated ns for each e2e test 2020-07-17 18:15:53 +02:00
Jan Chaloupka
f801c5f72f e2e: code cleanup 2020-07-17 18:15:13 +02:00
Jan Chaloupka
327880ba51 e2e: separate tests for LowNodeUtilization strategy and evict annotation 2020-07-17 18:11:01 +02:00
Jan Chaloupka
7bb8b4feda e2e: create and delete RC in the same function body
So create/delete are coupled and caller of create is responsible for calling delete as well
2020-07-17 18:06:19 +02:00
Kubernetes Prow Robot
36b1e1f061 Merge pull request #298 from stevehipwell/helm-chart
Add Helm chart
2020-07-16 16:15:00 -07:00
Steve Hipwell
4507a90bb6 Add helm chart 2020-07-16 17:36:17 +01:00
Kubernetes Prow Robot
550f68306c Merge pull request #342 from KohlsTechnology/remove-travis-ci-config
Remove Travis CI Configuration
2020-07-16 07:05:41 -07:00
Sean Malloy
2b668566ce Remove Travis CI Configuration
The e2e tests are now being run through Prow. Therefore the Travis CI
configuration can now be completely removed.
2020-07-15 23:52:27 -05:00
Kubernetes Prow Robot
ee414ea366 Merge pull request #344 from ingvagabund/kind-have-version-configurable-through-env
e2e: have k8s version configurable when creating cluster through kind
2020-07-14 06:47:21 -07:00
Jan Chaloupka
5761b5d595 e2e: have k8s version configurable when creating cluster through kind 2020-07-14 15:25:02 +02:00
Kubernetes Prow Robot
07f476dfc4 Merge pull request #340 from damemi/update-e2e
Update e2e script to use Kind setup
2020-07-13 19:57:20 -07:00
Mike Dame
f5e9f07321 Move kind setup to e2e script
This moves the kind setup (previously used by Travis) to the e2e runner script
to accomodate the switch to Prow. This provides a KIND_E2E env var to specify
whether to run the tests in kind, or (by default) to run locally).
2020-07-13 15:49:41 -04:00
Kubernetes Prow Robot
05c69ee26a Merge pull request #336 from lixiang233/avoid_duplicated_append_RemoveDuplicatePods
avoid appending list multiple times in RemoveDuplicates
2020-07-09 01:32:02 -07:00
Kubernetes Prow Robot
1623e09122 Merge pull request #332 from lixiang233/fix_test_struct_lownodeutilization
Add maxPodsToEvictPerNode to LowNodeUtilization testcase struct
2020-07-09 01:22:02 -07:00
lixiang
696aa7c505 rename maxPodsToEvict to maxPodsToEvictPerNode to avoid misunderstanding 2020-07-08 15:15:48 +08:00
lixiang
c53dce0805 Add maxPodsToEvictPerNode to testcase struct 2020-07-08 15:15:38 +08:00
lixiang
cd8b5a0354 avoid appending list multiple times in RemoveDuplicates 2020-07-07 14:42:53 +08:00
Kubernetes Prow Robot
b51f24eb8e Merge pull request #333 from lixiang233/refactor_lowNodeUtilization_func
Support only one sorting strategy in lowNodeUtilization
2020-07-02 22:48:47 -07:00
lixiang
ae3b4368ee support only one sorting strategy in lowNodeUtilization 2020-07-01 09:47:35 +08:00
Kubernetes Prow Robot
61eef93618 Merge pull request #330 from paulfantom/patch-1
examples: fix typo
2020-06-29 07:27:24 -07:00
Paweł Krupa
fa0a2ec6fe examples: fix typo
Fix incorrect example causing following error in runtime:

PodsHavingTooManyRestarts thresholds not set
2020-06-28 14:28:41 +02:00
Kubernetes Prow Robot
7ece10a643 Merge pull request #328 from KohlsTechnology/go114
Update To Go 1.14.4
2020-06-25 08:26:39 -07:00
Sean Malloy
71c8eae47e Update To Go 1.14.4
The kubernetes project has been updated to use Go 1.14.4. See below pull
request.

https://github.com/kubernetes/kubernetes/pull/88638

After making the updates to Go 1.14 "make gen" no longer worked. The
file hack/tools.go had to be created to get "make gen" working with Go
1.14.
2020-06-23 21:33:28 -05:00
Kubernetes Prow Robot
267b0837dc Merge pull request #327 from farah/farah/klog-migration
Update klog to v2
2020-06-23 08:35:40 -07:00
Kubernetes Prow Robot
c713537d56 Merge pull request #322 from lixiang233/move_pod_sort
Move sortPodsBasedOnPriority to pod util
2020-06-22 12:21:40 -07:00
Kubernetes Prow Robot
e374229707 Merge pull request #325 from KohlsTechnology/issue-template
Add initial GitHub issue templates
2020-06-22 07:42:40 -07:00
Sean Malloy
f834581a8e Fix Spelling Error 2020-06-22 09:12:22 -05:00
Ali Farah
15fcde5229 Update klog to v2 2020-06-22 20:43:22 +10:00
Sean Malloy
96c5dd3941 Add initial GitHub issue templates
Initially users can open a bug report, feature request, or misc request.
Bug reports and feature requests will have the "kind" label
automatically added.
2020-06-20 00:52:23 -05:00
lixiang
65a03e76bf Add sorting to RemovePodsViolatingInterPodAntiAffinity 2020-06-19 14:11:23 +08:00
lixiang
43525f6493 Move sortPodsBasedOnPriority to podutil 2020-06-16 19:19:03 +08:00
lixiang
7457626f62 Move helper funcs to testutil 2020-06-16 19:17:28 +08:00
Kubernetes Prow Robot
08c22e8921 Merge pull request #321 from KohlsTechnology/add-event-reason
Add Pod Eviction Reason To Events
2020-06-15 08:53:57 -07:00
Sean Malloy
4ff533ec17 Add pod eviction reason to k8s events
Prior to this change the event created for every pod eviction was
identical. Instead leverage the newly added eviction reason when
creating k8s events. This makes it easier for end users to understand
why the descheduler evicted a pod when inspecting k8s events.
2020-06-10 01:13:17 -05:00
Sean Malloy
7680e3d079 Use var declaration instead of short assignmnet
This is a very minor refactor to use a var declaration for the reason
variable. A var declaration is being used because the zero value for
strings is an empty string.

https://golang.org/ref/spec#The_zero_value
2020-06-10 01:05:05 -05:00
Kubernetes Prow Robot
305801dd0e Merge pull request #319 from damemi/remove-extra-eviction-log
Remove redundant eviction log message and add "Reason" param to EvictPod
2020-06-09 11:25:19 -07:00
Mike Dame
9951b85d60 Add optional reason parameter to EvictPod 2020-06-09 13:35:51 -04:00
Mike Dame
ff21ec9432 Remove redundant eviction log message 2020-06-09 12:17:45 -04:00
Kubernetes Prow Robot
5e15d77bf2 Merge pull request #309 from damemi/change-evict-local-storage-pods
Make evictLocalStoragePods a property of PodEvictor
2020-06-08 03:53:46 -07:00
Kubernetes Prow Robot
d833c73fc4 Merge pull request #317 from ingvagabund/extend-owners
Add myself to project reviewers
2020-06-05 10:45:44 -07:00
Kubernetes Prow Robot
795a80dfb0 Merge pull request #310 from lixiang233/ignore_no_evictable_pod_node
Skip evicting when no evictable pod found on node
2020-06-05 07:41:44 -07:00
Mike Dame
f5e4acdd8a Make evictLocalStoragePods a property of PodEvictor
This is a minor refactor of PodEvictor to include evictLocalStoragePods as a
property (so that it doesn't need to be passed to each strategy function.) It
also removes ListEvictablePodsOnNode and extends ListPodsOnANode with an optional
"filter" function parameter, so that for example IsEvictable can be passed to it
and achieve the same results as the function formerly known as ListEvictablePodsOnNode.
This approach also now allows strategies to more explicitly extend their criteria of what
IsEvictable considers an evictable pod (such as NodeAffinity, which checks that the pod
can fit on any other node).
2020-06-05 10:28:48 -04:00
Jan Chaloupka
eb4c1bb355 Add myself to project reviewers
Based on https://github.com/kubernetes/community/blob/master/community-membership.md#requirements-1:

The following apply to the part of codebase for which one would be a reviewer in an OWNERS file (for repos using the bot).

> member for at least 3 months

For a couple of years now

> Primary reviewer for at least 5 PRs to the codebase

https://github.com/kubernetes-sigs/descheduler/pull/285
https://github.com/kubernetes-sigs/descheduler/pull/275
https://github.com/kubernetes-sigs/descheduler/pull/267
https://github.com/kubernetes-sigs/descheduler/pull/254
https://github.com/kubernetes-sigs/descheduler/pull/181

> Reviewed or merged at least 20 substantial PRs to the codebase

https://github.com/kubernetes-sigs/descheduler/pulls?q=is%3Apr+is%3Aclosed+assignee%3Aingvagabund

> Knowledgeable about the codebase

yes

> Sponsored by a subproject approver
> With no objections from other approvers
> Done through PR to update the OWNERS file

this PR

> May either self-nominate, be nominated by an approver in this subproject, or be nominated by a robot

self-nominating
2020-06-04 18:16:38 +02:00
Kubernetes Prow Robot
6dfa95cc87 Merge pull request #312 from lixiang233/fix_lowUtilization_golint
Pass golint check for pkg/descheduler/strategies/
2020-06-04 07:09:15 -07:00
lixiang
eb9d974a8b pass golint check for pkg/descheduler/strategies/ 2020-06-04 14:33:24 +08:00
lixiang
d41a1f4a56 Ignore evicting when no evictable pod found on node 2020-06-04 10:31:15 +08:00
Kubernetes Prow Robot
138ad556a3 Merge pull request #315 from damemi/update-travis-yml
Remove redundant tests from Travis CI
2020-06-03 11:44:11 -07:00
Mike Dame
37124e6e45 Remove redundant tests from Travis CI
These tests are now covered by k8s test-infra prow tests.
2020-06-03 14:11:08 -04:00
Kubernetes Prow Robot
bd412bf87f Merge pull request #300 from damemi/refactor-is-evictable
Add more verbose logging to IsEvictable checks
2020-05-29 10:03:16 -07:00
Kubernetes Prow Robot
6f9b31f568 Merge pull request #308 from damemi/fix-lint-for-ci
Download golangci-lint to _output/bin directory
2020-05-29 09:57:15 -07:00
Mike Dame
c858740c4f Run golangci-lint from GOPATH 2020-05-29 12:48:42 -04:00
Kubernetes Prow Robot
bfefe634a1 Merge pull request #307 from damemi/make-verify
Add parent make verify target
2020-05-29 08:01:16 -07:00
Mike Dame
7b7b9e1cd7 Add parent make verify target 2020-05-29 10:23:38 -04:00
Mike Dame
0a4b8b0a25 Add more verbose logging to IsEvictable checks 2020-05-29 10:20:14 -04:00
Kubernetes Prow Robot
f28183dcbe Merge pull request #306 from ingvagabund/make-Params-a-pointer-II
Add missing address of api.StrategyParameters
2020-05-29 05:53:16 -07:00
Jan Chaloupka
abdf79454f Add missing address of api.StrategyParameters
https://github.com/kubernetes-sigs/descheduler/pull/285 got merged before re-running the unit tests.
2020-05-29 13:34:57 +02:00
Kubernetes Prow Robot
46b570b71d Merge pull request #296 from ingvagabund/make-Params-a-pointer
Make DeschedulerStrategy.Params a pointer
2020-05-28 22:53:15 -07:00
Kubernetes Prow Robot
616a9b5f6b Merge pull request #285 from lixiang233/Ft_change_lowUtilization_break
Change break condition and thresholds validation for lowUtilization
2020-05-28 07:36:02 -07:00
Kubernetes Prow Robot
003a4cdc2b Merge pull request #302 from lixiang233/clean_up_strategy_enable_check
Clean up code in LowNodeUtilization
2020-05-28 07:16:02 -07:00
Kubernetes Prow Robot
54ea05d8bb Merge pull request #299 from damemi/node-affinity-logs
Standardize node affinity strategy logs
2020-05-28 01:52:02 -07:00
lixiang
d0eea0cabb Clean up code in LowNodeUtilization 2020-05-28 10:35:46 +08:00
Mike Dame
f0297dfe03 Standardize node affinity strategy logs 2020-05-27 16:52:16 -04:00
Kubernetes Prow Robot
ef1f36f8e4 Merge pull request #297 from ingvagabund/add-verify-gofmt
Add verify-gofmt make target
2020-05-27 07:02:40 -07:00
Jan Chaloupka
a733c95dcc Add verify-gofmt and verify-vendor make target
So e.g. CI can run 'make verify-gofmt' instead of ./hack/verify-gofmt in case the script location changes.
2020-05-27 15:08:20 +02:00
Jan Chaloupka
5a81a0661b Make DeschedulerStrategy.Params a pointer
To avoid empty params fields when serializing:
  RemoveDuplicates:
    enabled: true
    params: {}
  RemovePodsHavingTooManyRestarts:
    enabled: true
    params:
      podsHavingTooManyRestarts: {}
  RemovePodsViolatingInterPodAntiAffinity:
    enabled: true
    params: {}
  RemovePodsViolatingNodeAffinity:
    enabled: true
    params:
      nodeAffinityType:
      - requiredDuringSchedulingIgnoredDuringExecution
  RemovePodsViolatingNodeTaints:
    enabled: true
    params: {}
2020-05-27 10:31:31 +02:00
Kubernetes Prow Robot
83e04960af Merge pull request #288 from KohlsTechnology/update-support-matrix
Update Compatibility Matrix in README
2020-05-26 06:37:12 -07:00
lixiang
2a8dc69cbb Stop condition and config validation change for lowUtilization
1.Set default CPU/Mem/Pods percentage of thresholds to 100
2.Stop evicting pods if any resource ran out
3.Add thresholds verification method and limit resource percentage within [0, 100]
4.Change testcases and readme
2020-05-25 19:03:37 +08:00
Kubernetes Prow Robot
5d82d08af3 Merge pull request #291 from KohlsTechnology/update-releasedocs
Add URL For Viewing Staging Registry
2020-05-22 09:55:11 -07:00
Sean Malloy
c265825166 Add URL For Viewing Staging Registry
This newly documented URL can be used to view the descheduler staging
registry in a web browser. This is easier to browse if the gcloud
command is not available.
2020-05-22 11:33:45 -05:00
Kubernetes Prow Robot
ed0126fb63 Merge pull request #290 from KohlsTechnology/bump-install-manifests
Update Install Manifests For Release v0.18.0
2020-05-22 07:08:38 -07:00
Sean Malloy
8c7267b379 Update Install Manifests For Release v0.18.0 2020-05-21 22:54:54 -05:00
Sean Malloy
435674fb44 Update Compatibility Matrix in README
The matrix has been updated with the soon the be released v0.18
details. Also, clarified the descheduler and k8s version compatibility
requirements and recommendations.
2020-05-20 10:55:57 -05:00
4301 changed files with 701033 additions and 155961 deletions

46
.github/ISSUE_TEMPLATE/bug_report.md vendored Normal file
View File

@@ -0,0 +1,46 @@
---
name: Bug report
about: Create a bug report to help improve descheduler
title: ''
labels: 'kind/bug'
assignees: ''
---
<!-- Please answer these questions before submitting your bug report. Thanks! -->
**What version of descheduler are you using?**
descheduler version:
**Does this issue reproduce with the latest release?**
**Which descheduler CLI options are you using?**
**Please provide a copy of your descheduler policy config file**
**What k8s version are you using (`kubectl version`)?**
<details><summary><code>kubectl version</code> Output</summary><br><pre>
$ kubectl version
</pre></details>
**What did you do?**
<!--
If possible, provide a recipe for reproducing the error.
A detailed sequence of steps describing what to do to observe the issue is good.
A complete runnable bash shell script is best.
-->
**What did you expect to see?**
**What did you see instead?**

View File

@@ -0,0 +1,26 @@
---
name: Feature request
about: Suggest an idea for descheduler
title: ''
labels: 'kind/feature'
assignees: ''
---
<!-- Please answer these questions before submitting your feature request. Thanks! -->
**Is your feature request related to a problem? Please describe.**
<!-- A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] -->
**Describe the solution you'd like**
<!-- A clear and concise description of what you want to happen. -->
**Describe alternatives you've considered**
<!-- A clear and concise description of any alternative solutions or features you've considered. -->
**What version of descheduler are you using?**
descheduler version:
**Additional context**
<!-- Add any other context or screenshots about the feature request here. -->

18
.github/ISSUE_TEMPLATE/misc_request.md vendored Normal file
View File

@@ -0,0 +1,18 @@
---
name: Miscellaneous
about: Not a bug and not a feature
title: ''
labels: ''
assignees: ''
---
<!--
Please do not use this to submit a bug report or feature request. Use the
bug report or feature request options instead.
Also, please consider posting in the Kubernetes Slack #sig-scheduling channel
instead of opening an issue if this is a support request.
Thanks!
-->

31
.github/workflows/release.yaml vendored Normal file
View File

@@ -0,0 +1,31 @@
name: Release Charts
on:
push:
branches:
- release-*
jobs:
release:
runs-on: ubuntu-latest
steps:
- name: Checkout
uses: actions/checkout@v2
with:
fetch-depth: 0
- name: Configure Git
run: |
git config user.name "$GITHUB_ACTOR"
git config user.email "$GITHUB_ACTOR@users.noreply.github.com"
- name: Install Helm
uses: azure/setup-helm@v1
with:
version: v3.4.0
- name: Run chart-releaser
uses: helm/chart-releaser-action@v1.1.0
env:
CR_TOKEN: "${{ secrets.GITHUB_TOKEN }}"
CR_RELEASE_NAME_TEMPLATE: "descheduler-helm-chart-{{ .Version }}"

2
.gitignore vendored
View File

@@ -3,3 +3,5 @@ _tmp/
vendordiff.patch
.idea/
*.code-workspace
.vscode/
kind

View File

@@ -1,33 +0,0 @@
sudo: false
language: go
go:
- 1.13.x
env:
- K8S_VERSION=v1.18.2
- K8S_VERSION=v1.17.5
- K8S_VERSION=v1.16.9
- K8S_VERSION=v1.15.11
services:
- docker
before_script:
- curl -Lo kubectl https://storage.googleapis.com/kubernetes-release/release/${K8S_VERSION}/bin/linux/amd64/kubectl && chmod +x kubectl && sudo mv kubectl /usr/local/bin/
- wget https://github.com/kubernetes-sigs/kind/releases/download/v0.8.1/kind-linux-amd64
- chmod +x kind-linux-amd64
- mv kind-linux-amd64 kind
- export PATH=$PATH:$PWD
- kind create cluster --image kindest/node:${K8S_VERSION} --config=$TRAVIS_BUILD_DIR/hack/kind_config.yaml
- export KUBECONFIG="$(kind get kubeconfig-path)"
- docker pull kubernetes/pause
- kind load docker-image kubernetes/pause
- kind get kubeconfig > /tmp/admin.conf
script:
- mkdir -p ~/gopath/src/sigs.k8s.io/
- mv ~/gopath/src/github.com/kubernetes-sigs/descheduler ~/gopath/src/sigs.k8s.io/.
- hack/verify-gofmt.sh
- hack/verify-vendor.sh
- make lint
- make build
- make test-unit
- make test-e2e

View File

@@ -11,15 +11,19 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
FROM golang:1.13.9
FROM golang:1.16.14
WORKDIR /go/src/sigs.k8s.io/descheduler
COPY . .
RUN make
ARG ARCH
ARG VERSION
RUN VERSION=${VERSION} make build.$ARCH
FROM scratch
MAINTAINER Avesh Agarwal <avesh.ncsu@gmail.com>
MAINTAINER Kubernetes SIG Scheduling <kubernetes-sig-scheduling@googlegroups.com>
USER 1000
COPY --from=0 /go/src/sigs.k8s.io/descheduler/_output/bin/descheduler /bin/descheduler

View File

@@ -13,7 +13,9 @@
# limitations under the License.
FROM scratch
MAINTAINER Avesh Agarwal <avesh.ncsu@gmail.com>
MAINTAINER Kubernetes SIG Scheduling <kubernetes-sig-scheduling@googlegroups.com>
USER 1000
COPY _output/bin/descheduler /bin/descheduler

View File

@@ -14,16 +14,18 @@
.PHONY: test
# VERSION is currently based on the last commit
VERSION?=$(shell git describe --tags)
COMMIT=$(shell git rev-parse HEAD)
# VERSION is based on a date stamp plus the last commit
VERSION?=v$(shell date +%Y%m%d)-$(shell git describe --tags --match "v*")
BRANCH?=$(shell git branch --show-current)
SHA1?=$(shell git rev-parse HEAD)
BUILD=$(shell date +%FT%T%z)
LDFLAG_LOCATION=sigs.k8s.io/descheduler/cmd/descheduler/app
LDFLAG_LOCATION=sigs.k8s.io/descheduler/pkg/version
ARCHS = amd64 arm arm64
LDFLAGS=-ldflags "-X ${LDFLAG_LOCATION}.version=${VERSION} -X ${LDFLAG_LOCATION}.buildDate=${BUILD} -X ${LDFLAG_LOCATION}.gitCommit=${COMMIT}"
LDFLAGS=-ldflags "-X ${LDFLAG_LOCATION}.version=${VERSION} -X ${LDFLAG_LOCATION}.buildDate=${BUILD} -X ${LDFLAG_LOCATION}.gitbranch=${BRANCH} -X ${LDFLAG_LOCATION}.gitsha1=${SHA1}"
GOLANGCI_VERSION := v1.15.0
HAS_GOLANGCI := $(shell which golangci-lint)
GOLANGCI_VERSION := v1.30.0
HAS_GOLANGCI := $(shell ls _output/bin/golangci-lint 2> /dev/null)
# REGISTRY is the container registry to push
# into. The default is to push to the staging
@@ -41,26 +43,74 @@ IMAGE_GCLOUD:=$(REGISTRY)/descheduler:$(VERSION)
# In the future binaries can be uploaded to
# GCS bucket gs://k8s-staging-descheduler.
HAS_HELM := $(shell which helm 2> /dev/null)
all: build
build:
CGO_ENABLED=0 go build ${LDFLAGS} -o _output/bin/descheduler sigs.k8s.io/descheduler/cmd/descheduler
build.amd64:
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build ${LDFLAGS} -o _output/bin/descheduler sigs.k8s.io/descheduler/cmd/descheduler
build.arm:
CGO_ENABLED=0 GOOS=linux GOARCH=arm GOARM=7 go build ${LDFLAGS} -o _output/bin/descheduler sigs.k8s.io/descheduler/cmd/descheduler
build.arm64:
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build ${LDFLAGS} -o _output/bin/descheduler sigs.k8s.io/descheduler/cmd/descheduler
dev-image: build
docker build -f Dockerfile.dev -t $(IMAGE) .
image:
docker build -t $(IMAGE) .
docker build --build-arg VERSION="$(VERSION)" --build-arg ARCH="amd64" -t $(IMAGE) .
push-container-to-gcloud: image
image.amd64:
docker build --build-arg VERSION="$(VERSION)" --build-arg ARCH="amd64" -t $(IMAGE)-amd64 .
image.arm:
docker build --build-arg VERSION="$(VERSION)" --build-arg ARCH="arm" -t $(IMAGE)-arm .
image.arm64:
docker build --build-arg VERSION="$(VERSION)" --build-arg ARCH="arm64" -t $(IMAGE)-arm64 .
push: image
gcloud auth configure-docker
docker tag $(IMAGE) $(IMAGE_GCLOUD)
docker push $(IMAGE_GCLOUD)
push: push-container-to-gcloud
push-all: image.amd64 image.arm image.arm64
gcloud auth configure-docker
for arch in $(ARCHS); do \
docker tag $(IMAGE)-$${arch} $(IMAGE_GCLOUD)-$${arch} ;\
docker push $(IMAGE_GCLOUD)-$${arch} ;\
done
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest create $(IMAGE_GCLOUD) $(addprefix --amend $(IMAGE_GCLOUD)-, $(ARCHS))
for arch in $(ARCHS); do \
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest annotate --arch $${arch} $(IMAGE_GCLOUD) $(IMAGE_GCLOUD)-$${arch} ;\
done
DOCKER_CLI_EXPERIMENTAL=enabled docker manifest push $(IMAGE_GCLOUD) ;\
clean:
rm -rf _output
rm -rf _tmp
verify: verify-govet verify-spelling verify-gofmt verify-vendor lint lint-chart verify-toc verify-gen
verify-govet:
./hack/verify-govet.sh
verify-spelling:
./hack/verify-spelling.sh
verify-gofmt:
./hack/verify-gofmt.sh
verify-vendor:
./hack/verify-vendor.sh
verify-toc:
./hack/verify-toc.sh
test-unit:
./test/run-unit-tests.sh
@@ -72,10 +122,26 @@ gen:
./hack/update-generated-conversions.sh
./hack/update-generated-deep-copies.sh
./hack/update-generated-defaulters.sh
#undo go mod changes caused by above.
go mod tidy
./hack/update-toc.sh
verify-gen:
./hack/verify-conversions.sh
./hack/verify-deep-copies.sh
./hack/verify-defaulters.sh
lint:
ifndef HAS_GOLANGCI
curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | sh -s -- -b $(GOPATH)/bin ${GOLANGCI_VERSION}
curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | sh -s -- -b ./_output/bin ${GOLANGCI_VERSION}
endif
golangci-lint run
./_output/bin/golangci-lint run
lint-chart: ensure-helm-install
helm lint ./charts/descheduler
test-helm: ensure-helm-install
./test/run-helm-tests.sh
ensure-helm-install:
ifndef HAS_HELM
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 && chmod 700 ./get_helm.sh && ./get_helm.sh
endif

11
OWNERS
View File

@@ -1,11 +1,16 @@
approvers:
- aveshagarwal
- k82cn
- ravisantoshgudimetla
- damemi
- ingvagabund
- seanmalloy
reviewers:
- aveshagarwal
- k82cn
- ravisantoshgudimetla
- damemi
- seanmalloy
- ingvagabund
- lixiang233
emeritus_approvers:
- aveshagarwal
- k82cn
- ravisantoshgudimetla

637
README.md
View File

@@ -1,10 +1,8 @@
[![Build Status](https://travis-ci.org/kubernetes-sigs/descheduler.svg?branch=master)](https://travis-ci.org/kubernetes-sigs/descheduler)
[![Go Report Card](https://goreportcard.com/badge/kubernetes-sigs/descheduler)](https://goreportcard.com/report/sigs.k8s.io/descheduler)
[![Go Report Card](https://goreportcard.com/badge/sigs.k8s.io/descheduler)](https://goreportcard.com/report/sigs.k8s.io/descheduler)
![Release Charts](https://github.com/kubernetes-sigs/descheduler/workflows/Release%20Charts/badge.svg)
# Descheduler for Kubernetes
## Introduction
Scheduling in Kubernetes is the process of binding pending pods to nodes, and is performed by
a component of Kubernetes called kube-scheduler. The scheduler's decisions, whether or where a
pod can or can not be scheduled, are guided by its configurable policy which comprises of set of
@@ -24,9 +22,45 @@ Descheduler, based on its policy, finds pods that can be moved and evicts them.
note, in current implementation, descheduler does not schedule replacement of evicted pods
but relies on the default scheduler for that.
Table of Contents
=================
<!-- toc -->
- [Quick Start](#quick-start)
- [Run As A Job](#run-as-a-job)
- [Run As A CronJob](#run-as-a-cronjob)
- [Run As A Deployment](#run-as-a-deployment)
- [Install Using Helm](#install-using-helm)
- [Install Using Kustomize](#install-using-kustomize)
- [User Guide](#user-guide)
- [Policy and Strategies](#policy-and-strategies)
- [RemoveDuplicates](#removeduplicates)
- [LowNodeUtilization](#lownodeutilization)
- [HighNodeUtilization](#highnodeutilization)
- [RemovePodsViolatingInterPodAntiAffinity](#removepodsviolatinginterpodantiaffinity)
- [RemovePodsViolatingNodeAffinity](#removepodsviolatingnodeaffinity)
- [RemovePodsViolatingNodeTaints](#removepodsviolatingnodetaints)
- [RemovePodsViolatingTopologySpreadConstraint](#removepodsviolatingtopologyspreadconstraint)
- [RemovePodsHavingTooManyRestarts](#removepodshavingtoomanyrestarts)
- [PodLifeTime](#podlifetime)
- [RemoveFailedPods](#removefailedpods)
- [Filter Pods](#filter-pods)
- [Namespace filtering](#namespace-filtering)
- [Priority filtering](#priority-filtering)
- [Label filtering](#label-filtering)
- [Node Fit filtering](#node-fit-filtering)
- [Pod Evictions](#pod-evictions)
- [Pod Disruption Budget (PDB)](#pod-disruption-budget-pdb)
- [Metrics](#metrics)
- [Compatibility Matrix](#compatibility-matrix)
- [Getting Involved and Contributing](#getting-involved-and-contributing)
- [Communicating With Contributors](#communicating-with-contributors)
- [Roadmap](#roadmap)
- [Code of conduct](#code-of-conduct)
<!-- /toc -->
## Quick Start
The descheduler can be run as a Job or CronJob inside of a k8s cluster. It has the
The descheduler can be run as a `Job`, `CronJob`, or `Deployment` inside of a k8s cluster. It has the
advantage of being able to be run multiple times without needing user intervention.
The descheduler pod is run as a critical pod in the `kube-system` namespace to avoid
being evicted by itself or by the kubelet.
@@ -34,17 +68,52 @@ being evicted by itself or by the kubelet.
### Run As A Job
```
kubectl create -f kubernetes/rbac.yaml
kubectl create -f kubernetes/configmap.yaml
kubectl create -f kubernetes/job.yaml
kubectl create -f kubernetes/base/rbac.yaml
kubectl create -f kubernetes/base/configmap.yaml
kubectl create -f kubernetes/job/job.yaml
```
### Run As A CronJob
```
kubectl create -f kubernetes/rbac.yaml
kubectl create -f kubernetes/configmap.yaml
kubectl create -f kubernetes/cronjob.yaml
kubectl create -f kubernetes/base/rbac.yaml
kubectl create -f kubernetes/base/configmap.yaml
kubectl create -f kubernetes/cronjob/cronjob.yaml
```
### Run As A Deployment
```
kubectl create -f kubernetes/base/rbac.yaml
kubectl create -f kubernetes/base/configmap.yaml
kubectl create -f kubernetes/deployment/deployment.yaml
```
### Install Using Helm
Starting with release v0.18.0 there is an official helm chart that can be used to install the
descheduler. See the [helm chart README](https://github.com/kubernetes-sigs/descheduler/blob/master/charts/descheduler/README.md) for detailed instructions.
The descheduler helm chart is also listed on the [artifact hub](https://artifacthub.io/packages/helm/descheduler/descheduler).
### Install Using Kustomize
You can use kustomize to install descheduler.
See the [resources | Kustomize](https://kubectl.docs.kubernetes.io/references/kustomize/resource/) for detailed instructions.
Run As A Job
```
kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/job?ref=v0.22.1' | kubectl apply -f -
```
Run As A CronJob
```
kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/cronjob?ref=v0.22.1' | kubectl apply -f -
```
Run As A Deployment
```
kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/deployment?ref=v0.22.1' | kubectl apply -f -
```
## User Guide
@@ -53,25 +122,65 @@ See the [user guide](docs/user-guide.md) in the `/docs` directory.
## Policy and Strategies
Descheduler's policy is configurable and includes strategies that can be enabled or disabled.
Seven strategies `RemoveDuplicates`, `LowNodeUtilization`, `RemovePodsViolatingInterPodAntiAffinity`,
`RemovePodsViolatingNodeAffinity`, `RemovePodsViolatingNodeTaints`, `RemovePodsHavingTooManyRestarts`, and `PodLifeTime`
are currently implemented. As part of the policy, the parameters associated with the strategies can be configured too.
By default, all strategies are enabled.
Descheduler's policy is configurable and includes strategies that can be enabled or disabled. By default, all strategies are enabled.
The policy includes a common configuration that applies to all the strategies:
| Name | Default Value | Description |
|------|---------------|-------------|
| `nodeSelector` | `nil` | limiting the nodes which are processed |
| `evictLocalStoragePods` | `false` | allows eviction of pods with local storage |
| `evictSystemCriticalPods` | `false` | [Warning: Will evict Kubernetes system pods] allows eviction of pods with any priority, including system pods like kube-dns |
| `ignorePvcPods` | `false` | set whether PVC pods should be evicted or ignored |
| `maxNoOfPodsToEvictPerNode` | `nil` | maximum number of pods evicted from each node (summed through all strategies) |
As part of the policy, the parameters associated with each strategy can be configured.
See each strategy for details on available parameters.
**Policy:**
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
nodeSelector: prod=dev
evictLocalStoragePods: true
evictSystemCriticalPods: true
maxNoOfPodsToEvictPerNode: 40
ignorePvcPods: false
strategies:
...
```
The following diagram provides a visualization of most of the strategies to help
categorize how strategies fit together.
![Strategies diagram](strategies_diagram.png)
### RemoveDuplicates
This strategy makes sure that there is only one pod associated with a Replica Set (RS),
Replication Controller (RC), Deployment, or Job running on the same node. If there are more,
This strategy makes sure that there is only one pod associated with a ReplicaSet (RS),
ReplicationController (RC), StatefulSet, or Job running on the same node. If there are more,
those duplicate pods are evicted for better spreading of pods in a cluster. This issue could happen
if some nodes went down due to whatever reasons, and pods on them were moved to other nodes leading to
more than one pod associated with a RS or RC, for example, running on the same node. Once the failed nodes
are ready again, this strategy could be enabled to evict those duplicate pods.
It provides one optional parameter, `ExcludeOwnerKinds`, which is a list of OwnerRef `Kind`s. If a pod
has any of these `Kind`s listed as an `OwnerRef`, that pod will not be considered for eviction.
It provides one optional parameter, `excludeOwnerKinds`, which is a list of OwnerRef `Kind`s. If a pod
has any of these `Kind`s listed as an `OwnerRef`, that pod will not be considered for eviction. Note that
pods created by Deployments are considered for eviction by this strategy. The `excludeOwnerKinds` parameter
should include `ReplicaSet` to have pods created by Deployments excluded.
```
**Parameters:**
|Name|Type|
|---|---|
|`excludeOwnerKinds`|list(string)|
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
**Example:**
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
@@ -90,19 +199,37 @@ in the hope that recreation of evicted pods will be scheduled on these underutil
parameters of this strategy are configured under `nodeResourceUtilizationThresholds`.
The under utilization of nodes is determined by a configurable threshold `thresholds`. The threshold
`thresholds` can be configured for cpu, memory, and number of pods in terms of percentage. If a node's
usage is below threshold for all (cpu, memory, and number of pods), the node is considered underutilized.
`thresholds` can be configured for cpu, memory, number of pods, and extended resources in terms of percentage (the percentage is
calculated as the current resources requested on the node vs [total allocatable](https://kubernetes.io/docs/concepts/architecture/nodes/#capacity).
For pods, this means the number of pods on the node as a fraction of the pod capacity set for that node).
If a node's usage is below threshold for all (cpu, memory, number of pods and extended resources), the node is considered underutilized.
Currently, pods request resource requirements are considered for computing node resource utilization.
There is another configurable threshold, `targetThresholds`, that is used to compute those potential nodes
from where pods could be evicted. Any node, between the thresholds, `thresholds` and `targetThresholds` is
from where pods could be evicted. If a node's usage is above targetThreshold for any (cpu, memory, number of pods, or extended resources),
the node is considered over utilized. Any node between the thresholds, `thresholds` and `targetThresholds` is
considered appropriately utilized and is not considered for eviction. The threshold, `targetThresholds`,
can be configured for cpu, memory, and number of pods too in terms of percentage.
These thresholds, `thresholds` and `targetThresholds`, could be tuned as per your cluster requirements.
Here is an example of a policy for this strategy:
These thresholds, `thresholds` and `targetThresholds`, could be tuned as per your cluster requirements. Note that this
strategy evicts pods from `overutilized nodes` (those with usage above `targetThresholds`) to `underutilized nodes`
(those with usage below `thresholds`), it will abort if any number of `underutilized nodes` or `overutilized nodes` is zero.
```
**Parameters:**
|Name|Type|
|---|---|
|`thresholds`|map(string:int)|
|`targetThresholds`|map(string:int)|
|`numberOfNodes`|int|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
**Example:**
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
@@ -120,26 +247,105 @@ strategies:
"pods": 50
```
Policy should pass the following validation checks:
* Three basic native types of resources are supported: `cpu`, `memory` and `pods`.
If any of these resource types is not specified, all its thresholds default to 100% to avoid nodes going from underutilized to overutilized.
* Extended resources are supported. For example, resource type `nvidia.com/gpu` is specified for GPU node utilization. Extended resources are optional,
and will not be used to compute node's usage if it's not specified in `thresholds` and `targetThresholds` explicitly.
* `thresholds` or `targetThresholds` can not be nil and they must configure exactly the same types of resources.
* The valid range of the resource's percentage value is \[0, 100\]
* Percentage value of `thresholds` can not be greater than `targetThresholds` for the same resource.
There is another parameter associated with the `LowNodeUtilization` strategy, called `numberOfNodes`.
This parameter can be configured to activate the strategy only when the number of under utilized nodes
are above the configured value. This could be helpful in large clusters where a few nodes could go
under utilized frequently or for a short period of time. By default, `numberOfNodes` is set to zero.
### HighNodeUtilization
This strategy finds nodes that are under utilized and evicts pods from the nodes in the hope that these pods will be
scheduled compactly into fewer nodes. Used in conjunction with node auto-scaling, this strategy is intended to help
trigger down scaling of under utilized nodes.
This strategy **must** be used with the scheduler strategy `MostRequestedPriority`. The parameters of this strategy are
configured under `nodeResourceUtilizationThresholds`.
The under utilization of nodes is determined by a configurable threshold `thresholds`. The threshold
`thresholds` can be configured for cpu, memory, number of pods, and extended resources in terms of percentage. The percentage is
calculated as the current resources requested on the node vs [total allocatable](https://kubernetes.io/docs/concepts/architecture/nodes/#capacity).
For pods, this means the number of pods on the node as a fraction of the pod capacity set for that node.
If a node's usage is below threshold for all (cpu, memory, number of pods and extended resources), the node is considered underutilized.
Currently, pods request resource requirements are considered for computing node resource utilization.
Any node above `thresholds` is considered appropriately utilized and is not considered for eviction.
The `thresholds` param could be tuned as per your cluster requirements. Note that this
strategy evicts pods from `underutilized nodes` (those with usage below `thresholds`)
so that they can be recreated in appropriately utilized nodes.
The strategy will abort if any number of `underutilized nodes` or `appropriately utilized nodes` is zero.
**Parameters:**
|Name|Type|
|---|---|
|`thresholds`|map(string:int)|
|`numberOfNodes`|int|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
**Example:**
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"HighNodeUtilization":
enabled: true
params:
nodeResourceUtilizationThresholds:
thresholds:
"cpu" : 20
"memory": 20
"pods": 20
```
Policy should pass the following validation checks:
* Three basic native types of resources are supported: `cpu`, `memory` and `pods`. If any of these resource types is not specified, all its thresholds default to 100%.
* Extended resources are supported. For example, resource type `nvidia.com/gpu` is specified for GPU node utilization. Extended resources are optional, and will not be used to compute node's usage if it's not specified in `thresholds` explicitly.
* `thresholds` can not be nil.
* The valid range of the resource's percentage value is \[0, 100\]
There is another parameter associated with the `HighNodeUtilization` strategy, called `numberOfNodes`.
This parameter can be configured to activate the strategy only when the number of under utilized nodes
is above the configured value. This could be helpful in large clusters where a few nodes could go
under utilized frequently or for a short period of time. By default, `numberOfNodes` is set to zero.
### RemovePodsViolatingInterPodAntiAffinity
This strategy makes sure that pods violating interpod anti-affinity are removed from nodes. For example,
if there is podA on a node and podB and podC (running on the same node) have anti-affinity rules which prohibit
them to run on the same node, then podA will be evicted from the node so that podB and podC could run. This
issue could happen, when the anti-affinity rules for podB and podC are created when they are already running on
node. Currently, there are no parameters associated with this strategy. To disable this strategy, the
policy should look like:
node.
```
**Parameters:**
|Name|Type|
|---|---|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|`labelSelector`|(see [label filtering](#label-filtering))|
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
**Example:**
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"RemovePodsViolatingInterPodAntiAffinity":
enabled: false
enabled: true
```
### RemovePodsViolatingNodeAffinity
@@ -160,9 +366,20 @@ of scheduling. Over time nodeA stops to satisfy the rule. When the strategy gets
executed and there is another node available that satisfies the node affinity rule,
podA gets evicted from nodeA.
The policy file should look like:
**Parameters:**
```
|Name|Type|
|---|---|
|`nodeAffinityType`|list(string)|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|`labelSelector`|(see [label filtering](#label-filtering))|
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
**Example:**
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
@@ -178,9 +395,21 @@ strategies:
This strategy makes sure that pods violating NoSchedule taints on nodes are removed. For example there is a
pod "podA" with a toleration to tolerate a taint ``key=value:NoSchedule`` scheduled and running on the tainted
node. If the node's taint is subsequently updated/removed, taint is no longer satisfied by its pods' tolerations
and will be evicted. The policy file should look like:
and will be evicted.
````
**Parameters:**
|Name|Type|
|---|---|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|`labelSelector`|(see [label filtering](#label-filtering))|
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
**Example:**
````yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
@@ -188,11 +417,63 @@ strategies:
enabled: true
````
### RemovePodsViolatingTopologySpreadConstraint
This strategy makes sure that pods violating [topology spread constraints](https://kubernetes.io/docs/concepts/workloads/pods/pod-topology-spread-constraints/)
are evicted from nodes. Specifically, it tries to evict the minimum number of pods required to balance topology domains to within each constraint's `maxSkew`.
This strategy requires k8s version 1.18 at a minimum.
By default, this strategy only deals with hard constraints, setting parameter `includeSoftConstraints` to `true` will
include soft constraints.
Strategy parameter `labelSelector` is not utilized when balancing topology domains and is only applied during eviction to determine if the pod can be evicted.
**Parameters:**
|Name|Type|
|---|---|
|`includeSoftConstraints`|bool|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|`labelSelector`|(see [label filtering](#label-filtering))|
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
**Example:**
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"RemovePodsViolatingTopologySpreadConstraint":
enabled: true
params:
includeSoftConstraints: false
```
### RemovePodsHavingTooManyRestarts
This strategy makes sure that pods having too many restarts are removed from nodes. For example a pod with EBS/PD that can't get the volume/disk attached to the instance, then the pod should be re-scheduled to other nodes.
This strategy makes sure that pods having too many restarts are removed from nodes. For example a pod with EBS/PD that
can't get the volume/disk attached to the instance, then the pod should be re-scheduled to other nodes. Its parameters
include `podRestartThreshold`, which is the number of restarts at which a pod should be evicted, and `includingInitContainers`,
which determines whether init container restarts should be factored into that calculation.
|`labelSelector`|(see [label filtering](#label-filtering))|
```
**Parameters:**
|Name|Type|
|---|---|
|`podRestartThreshold`|int|
|`includingInitContainers`|bool|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
**Example:**
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
@@ -206,42 +487,302 @@ strategies:
### PodLifeTime
This strategy evicts pods that are older than `.strategies.PodLifeTime.params.maxPodLifeTimeSeconds` The policy
file should look like:
This strategy evicts pods that are older than `maxPodLifeTimeSeconds`.
````
You can also specify `podStatusPhases` to `only` evict pods with specific `StatusPhases`, currently this parameter is limited
to `Running` and `Pending`.
**Parameters:**
|Name|Type|
|---|---|
|`maxPodLifeTimeSeconds`|int|
|`podStatusPhases`|list(string)|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|`labelSelector`|(see [label filtering](#label-filtering))|
**Example:**
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"PodLifeTime":
enabled: true
params:
podLifeTime:
maxPodLifeTimeSeconds: 86400
podStatusPhases:
- "Pending"
```
### RemoveFailedPods
This strategy evicts pods that are in failed status phase.
You can provide an optional parameter to filter by failed `reasons`.
`reasons` can be expanded to include reasons of InitContainers as well by setting the optional parameter `includingInitContainers` to `true`.
You can specify an optional parameter `minPodLifeTimeSeconds` to evict pods that are older than specified seconds.
Lastly, you can specify the optional parameter `excludeOwnerKinds` and if a pod
has any of these `Kind`s listed as an `OwnerRef`, that pod will not be considered for eviction.
**Parameters:**
|Name|Type|
|---|---|
|`minPodLifeTimeSeconds`|uint|
|`excludeOwnerKinds`|list(string)|
|`reasons`|list(string)|
|`includingInitContainers`|bool|
|`thresholdPriority`|int (see [priority filtering](#priority-filtering))|
|`thresholdPriorityClassName`|string (see [priority filtering](#priority-filtering))|
|`namespaces`|(see [namespace filtering](#namespace-filtering))|
|`labelSelector`|(see [label filtering](#label-filtering))|
|`nodeFit`|bool (see [node fit filtering](#node-fit-filtering))|
**Example:**
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"RemoveFailedPods":
enabled: true
params:
failedPods:
reasons:
- "NodeAffinity"
includingInitContainers: true
excludeOwnerKinds:
- "Job"
minPodLifeTimeSeconds: 3600
```
## Filter Pods
### Namespace filtering
The following strategies accept a `namespaces` parameter which allows to specify a list of including, resp. excluding namespaces:
* `PodLifeTime`
* `RemovePodsHavingTooManyRestarts`
* `RemovePodsViolatingNodeTaints`
* `RemovePodsViolatingNodeAffinity`
* `RemovePodsViolatingInterPodAntiAffinity`
* `RemoveDuplicates`
* `RemovePodsViolatingTopologySpreadConstraint`
* `RemoveFailedPods`
For example:
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"PodLifeTime":
enabled: true
params:
podLifeTime:
maxPodLifeTimeSeconds: 86400
namespaces:
include:
- "namespace1"
- "namespace2"
```
In the examples `PodLifeTime` gets executed only over `namespace1` and `namespace2`.
The similar holds for `exclude` field:
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"PodLifeTime":
enabled: true
params:
podLifeTime:
maxPodLifeTimeSeconds: 86400
namespaces:
exclude:
- "namespace1"
- "namespace2"
```
The strategy gets executed over all namespaces but `namespace1` and `namespace2`.
It's not allowed to compute `include` with `exclude` field.
### Priority filtering
All strategies are able to configure a priority threshold, only pods under the threshold can be evicted. You can
specify this threshold by setting `thresholdPriorityClassName`(setting the threshold to the value of the given
priority class) or `thresholdPriority`(directly setting the threshold) parameters. By default, this threshold
is set to the value of `system-cluster-critical` priority class.
Note: Setting `evictSystemCriticalPods` to true disables priority filtering entirely.
E.g.
Setting `thresholdPriority`
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"PodLifeTime":
enabled: true
params:
podLifeTime:
maxPodLifeTimeSeconds: 86400
thresholdPriority: 10000
```
Setting `thresholdPriorityClassName`
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"PodLifeTime":
enabled: true
params:
podLifeTime:
maxPodLifeTimeSeconds: 86400
thresholdPriorityClassName: "priorityclass1"
```
Note that you can't configure both `thresholdPriority` and `thresholdPriorityClassName`, if the given priority class
does not exist, descheduler won't create it and will throw an error.
### Label filtering
The following strategies can configure a [standard kubernetes labelSelector](https://kubernetes.io/docs/reference/generated/kubernetes-api/v1.22/#labelselector-v1-meta)
to filter pods by their labels:
* `PodLifeTime`
* `RemovePodsHavingTooManyRestarts`
* `RemovePodsViolatingNodeTaints`
* `RemovePodsViolatingNodeAffinity`
* `RemovePodsViolatingInterPodAntiAffinity`
* `RemovePodsViolatingTopologySpreadConstraint`
* `RemoveFailedPods`
This allows running strategies among pods the descheduler is interested in.
For example:
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"PodLifeTime":
enabled: true
params:
podLifeTime:
maxPodLifeTimeSeconds: 86400
````
labelSelector:
matchLabels:
component: redis
matchExpressions:
- {key: tier, operator: In, values: [cache]}
- {key: environment, operator: NotIn, values: [dev]}
```
### Node Fit filtering
The following strategies accept a `nodeFit` boolean parameter which can optimize descheduling:
* `RemoveDuplicates`
* `LowNodeUtilization`
* `HighNodeUtilization`
* `RemovePodsViolatingInterPodAntiAffinity`
* `RemovePodsViolatingNodeAffinity`
* `RemovePodsViolatingNodeTaints`
* `RemovePodsViolatingTopologySpreadConstraint`
* `RemovePodsHavingTooManyRestarts`
* `RemoveFailedPods`
If set to `true` the descheduler will consider whether or not the pods that meet eviction criteria will fit on other nodes before evicting them. If a pod cannot be rescheduled to another node, it will not be evicted. Currently the following criteria are considered when setting `nodeFit` to `true`:
- A `nodeSelector` on the pod
- Any `Tolerations` on the pod and any `Taints` on the other nodes
- `nodeAffinity` on the pod
- Whether any of the other nodes are marked as `unschedulable`
E.g.
```yaml
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"LowNodeUtilization":
enabled: true
params:
nodeResourceUtilizationThresholds:
thresholds:
"cpu" : 20
"memory": 20
"pods": 20
targetThresholds:
"cpu" : 50
"memory": 50
"pods": 50
nodeFit: true
```
Note that node fit filtering references the current pod spec, and not that of it's owner.
Thus, if the pod is owned by a ReplicationController (and that ReplicationController was modified recently),
the pod may be running with an outdated spec, which the descheduler will reference when determining node fit.
This is expected behavior as the descheduler is a "best-effort" mechanism.
Using Deployments instead of ReplicationControllers provides an automated rollout of pod spec changes, therefore ensuring that the descheduler has an up-to-date view of the cluster state.
## Pod Evictions
When the descheduler decides to evict pods from a node, it employs the following general mechanism:
* [Critical pods](https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/) (with priorityClassName set to system-cluster-critical or system-node-critical) are never evicted.
* Pods (static or mirrored pods or stand alone pods) not part of an RC, RS, Deployment or Job are
* [Critical pods](https://kubernetes.io/docs/tasks/administer-cluster/guaranteed-scheduling-critical-addon-pods/) (with priorityClassName set to system-cluster-critical or system-node-critical) are never evicted (unless `evictSystemCriticalPods: true` is set).
* Pods (static or mirrored pods or stand alone pods) not part of an ReplicationController, ReplicaSet(Deployment), StatefulSet, or Job are
never evicted because these pods won't be recreated.
* Pods associated with DaemonSets are never evicted.
* Pods with local storage are never evicted.
* Best efforts pods are evicted before burstable and guaranteed pods.
* All types of pods with the annotation descheduler.alpha.kubernetes.io/evict are evicted. This
* Pods with local storage are never evicted (unless `evictLocalStoragePods: true` is set).
* Pods with PVCs are evicted (unless `ignorePvcPods: true` is set).
* In `LowNodeUtilization` and `RemovePodsViolatingInterPodAntiAffinity`, pods are evicted by their priority from low to high, and if they have same priority,
best effort pods are evicted before burstable and guaranteed pods.
* All types of pods with the annotation `descheduler.alpha.kubernetes.io/evict` are eligible for eviction. This
annotation is used to override checks which prevent eviction and users can select which pod is evicted.
Users should know how and if the pod will be recreated.
Setting `--v=4` or greater on the Descheduler will log all reasons why any pod is not evictable.
### Pod Disruption Budget (PDB)
Pods subject to a Pod Disruption Budget(PDB) are not evicted if descheduling violates its PDB. The pods
are evicted by using the eviction subresource to handle PDB.
## Compatibility Matrix
## Metrics
Descheduler | supported Kubernetes version
| name | type | description |
|-------|-------|----------------|
| build_info | gauge | constant 1 |
| pods_evicted | CounterVec | total number of pods evicted |
The metrics are served through https://localhost:10258/metrics by default.
The address and port can be changed by setting `--binding-address` and `--secure-port` flags.
## Compatibility Matrix
The below compatibility matrix shows the k8s client package(client-go, apimachinery, etc) versions that descheduler
is compiled with. At this time descheduler does not have a hard dependency to a specific k8s release. However a
particular descheduler release is only tested against the three latest k8s minor versions. For example descheduler
v0.18 should work with k8s v1.18, v1.17, and v1.16.
Starting with descheduler release v0.18 the minor version of descheduler matches the minor version of the k8s client
packages that it is compiled with.
Descheduler | Supported Kubernetes Version
-------------|-----------------------------
v0.22 | v1.22
v0.21 | v1.21
v0.20 | v1.20
v0.19 | v1.19
v0.18 | v1.18
v0.10 | v1.17
v0.4-v0.9 | v1.9+
v0.1-v0.3 | v1.7-v1.8

View File

@@ -0,0 +1,22 @@
# Patterns to ignore when building packages.
# This supports shell glob matching, relative path matching, and
# negation (prefixed with !). Only one pattern per line.
.DS_Store
# Common VCS dirs
.git/
.gitignore
.bzr/
.bzrignore
.hg/
.hgignore
.svn/
# Common backup files
*.swp
*.bak
*.tmp
*~
# Various IDEs
.project
.idea/
*.tmproj
.vscode/

View File

@@ -0,0 +1,16 @@
apiVersion: v1
name: descheduler
version: 0.22.1
appVersion: 0.22.1
description: Descheduler for Kubernetes is used to rebalance clusters by evicting pods that can potentially be scheduled on better nodes. In the current implementation, descheduler does not schedule replacement of evicted pods but relies on the default scheduler for that.
keywords:
- kubernetes
- descheduler
- kube-scheduler
home: https://github.com/kubernetes-sigs/descheduler
icon: https://kubernetes.io/images/favicon.png
sources:
- https://github.com/kubernetes-sigs/descheduler
maintainers:
- name: Kubernetes SIG Scheduling
email: kubernetes-sig-scheduling@googlegroups.com

View File

@@ -0,0 +1,70 @@
# Descheduler for Kubernetes
[Descheduler](https://github.com/kubernetes-sigs/descheduler/) for Kubernetes is used to rebalance clusters by evicting pods that can potentially be scheduled on better nodes. In the current implementation, descheduler does not schedule replacement of evicted pods but relies on the default scheduler for that.
## TL;DR:
```shell
helm repo add descheduler https://kubernetes-sigs.github.io/descheduler/
helm install my-release --namespace kube-system descheduler/descheduler
```
## Introduction
This chart bootstraps a [descheduler](https://github.com/kubernetes-sigs/descheduler/) cron job on a [Kubernetes](http://kubernetes.io) cluster using the [Helm](https://helm.sh) package manager.
## Prerequisites
- Kubernetes 1.14+
## Installing the Chart
To install the chart with the release name `my-release`:
```shell
helm install --namespace kube-system my-release descheduler/descheduler
```
The command deploys _descheduler_ on the Kubernetes cluster in the default configuration. The [configuration](#configuration) section lists the parameters that can be configured during installation.
> **Tip**: List all releases using `helm list`
## Uninstalling the Chart
To uninstall/delete the `my-release` deployment:
```shell
helm delete my-release
```
The command removes all the Kubernetes components associated with the chart and deletes the release.
## Configuration
The following table lists the configurable parameters of the _descheduler_ chart and their default values.
| Parameter | Description | Default |
| ------------------------------ | --------------------------------------------------------------------------------------------------------------------- | ------------------------------------ |
| `kind` | Use as CronJob or Deployment | `CronJob` |
| `image.repository` | Docker repository to use | `k8s.gcr.io/descheduler/descheduler` |
| `image.tag` | Docker tag to use | `v[chart appVersion]` |
| `image.pullPolicy` | Docker image pull policy | `IfNotPresent` |
| `imagePullSecrets` | Docker repository secrets | `[]` |
| `nameOverride` | String to partially override `descheduler.fullname` template (will prepend the release name) | `""` |
| `fullnameOverride` | String to fully override `descheduler.fullname` template | `""` |
| `cronJobApiVersion` | CronJob API Group Version | `"batch/v1"` |
| `schedule` | The cron schedule to run the _descheduler_ job on | `"*/2 * * * *"` |
| `startingDeadlineSeconds` | If set, configure `startingDeadlineSeconds` for the _descheduler_ job | `nil` |
| `successfulJobsHistoryLimit` | If set, configure `successfulJobsHistoryLimit` for the _descheduler_ job | `nil` |
| `failedJobsHistoryLimit` | If set, configure `failedJobsHistoryLimit` for the _descheduler_ job | `nil` |
| `deschedulingInterval` | If using kind:Deployment, sets time between consecutive descheduler executions. | `5m` |
| `cmdOptions` | The options to pass to the _descheduler_ command | _see values.yaml_ |
| `deschedulerPolicy.strategies` | The _descheduler_ strategies to apply | _see values.yaml_ |
| `priorityClassName` | The name of the priority class to add to pods | `system-cluster-critical` |
| `rbac.create` | If `true`, create & use RBAC resources | `true` |
| `podSecurityPolicy.create` | If `true`, create PodSecurityPolicy | `true` |
| `resources` | Descheduler container CPU and memory requests/limits | _see values.yaml_ |
| `serviceAccount.create` | If `true`, create a service account for the cron job | `true` |
| `serviceAccount.name` | The name of the service account to use, if not set and create is true a name is generated using the fullname template | `nil` |
| `nodeSelector` | Node selectors to run the descheduler cronjob on specific nodes | `nil` |
| `tolerations` | tolerations to run the descheduler cronjob on specific nodes | `nil` |

View File

@@ -0,0 +1 @@
Descheduler installed as a cron job.

View File

@@ -0,0 +1,64 @@
{{/* vim: set filetype=mustache: */}}
{{/*
Expand the name of the chart.
*/}}
{{- define "descheduler.name" -}}
{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{/*
Create a default fully qualified app name.
We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec).
If release name contains chart name it will be used as a full name.
*/}}
{{- define "descheduler.fullname" -}}
{{- if .Values.fullnameOverride -}}
{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- $name := default .Chart.Name .Values.nameOverride -}}
{{- if contains $name .Release.Name -}}
{{- .Release.Name | trunc 63 | trimSuffix "-" -}}
{{- else -}}
{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{- end -}}
{{- end -}}
{{/*
Create chart name and version as used by the chart label.
*/}}
{{- define "descheduler.chart" -}}
{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" -}}
{{- end -}}
{{/*
Common labels
*/}}
{{- define "descheduler.labels" -}}
app.kubernetes.io/name: {{ include "descheduler.name" . }}
helm.sh/chart: {{ include "descheduler.chart" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- if .Chart.AppVersion }}
app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
{{- end }}
app.kubernetes.io/managed-by: {{ .Release.Service }}
{{- end -}}
{{/*
Selector labels
*/}}
{{- define "descheduler.selectorLabels" -}}
app.kubernetes.io/name: {{ include "descheduler.name" . }}
app.kubernetes.io/instance: {{ .Release.Name }}
{{- end -}}
{{/*
Create the name of the service account to use
*/}}
{{- define "descheduler.serviceAccountName" -}}
{{- if .Values.serviceAccount.create -}}
{{ default (include "descheduler.fullname" .) .Values.serviceAccount.name }}
{{- else -}}
{{ default "default" .Values.serviceAccount.name }}
{{- end -}}
{{- end -}}

View File

@@ -0,0 +1,34 @@
{{- if .Values.rbac.create -}}
kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: {{ template "descheduler.fullname" . }}
labels:
{{- include "descheduler.labels" . | nindent 4 }}
rules:
- apiGroups: [""]
resources: ["events"]
verbs: ["create", "update"]
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "watch", "list"]
- apiGroups: [""]
resources: ["namespaces"]
verbs: ["get", "list"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "watch", "list", "delete"]
- apiGroups: [""]
resources: ["pods/eviction"]
verbs: ["create"]
- apiGroups: ["scheduling.k8s.io"]
resources: ["priorityclasses"]
verbs: ["get", "watch", "list"]
{{- if .Values.podSecurityPolicy.create }}
- apiGroups: ['policy']
resources: ['podsecuritypolicies']
verbs: ['use']
resourceNames:
- {{ template "descheduler.fullname" . }}
{{- end }}
{{- end -}}

View File

@@ -0,0 +1,16 @@
{{- if .Values.rbac.create -}}
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: {{ template "descheduler.fullname" . }}
labels:
{{- include "descheduler.labels" . | nindent 4 }}
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole
name: {{ template "descheduler.fullname" . }}
subjects:
- kind: ServiceAccount
name: {{ template "descheduler.serviceAccountName" . }}
namespace: {{ .Release.Namespace }}
{{- end -}}

View File

@@ -0,0 +1,11 @@
apiVersion: v1
kind: ConfigMap
metadata:
name: {{ template "descheduler.fullname" . }}
labels:
{{- include "descheduler.labels" . | nindent 4 }}
data:
policy.yaml: |
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
{{ toYaml .Values.deschedulerPolicy | trim | indent 4 }}

View File

@@ -0,0 +1,85 @@
{{- if eq .Values.kind "CronJob" }}
apiVersion: {{ .Values.cronJobApiVersion | default "batch/v1" }}
kind: CronJob
metadata:
name: {{ template "descheduler.fullname" . }}
labels:
{{- include "descheduler.labels" . | nindent 4 }}
spec:
schedule: {{ .Values.schedule | quote }}
concurrencyPolicy: "Forbid"
{{- if .Values.startingDeadlineSeconds }}
startingDeadlineSeconds: {{ .Values.startingDeadlineSeconds }}
{{- end }}
{{- if .Values.successfulJobsHistoryLimit }}
successfulJobsHistoryLimit: {{ .Values.successfulJobsHistoryLimit }}
{{- end }}
{{- if .Values.failedJobsHistoryLimit }}
failedJobsHistoryLimit: {{ .Values.failedJobsHistoryLimit }}
{{- end }}
jobTemplate:
spec:
template:
metadata:
name: {{ template "descheduler.fullname" . }}
annotations:
checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
{{- if .Values.podAnnotations }}
{{- .Values.podAnnotations | toYaml | nindent 12 }}
{{- end }}
labels:
{{- include "descheduler.selectorLabels" . | nindent 12 }}
{{- if .Values.podLabels }}
{{- .Values.podLabels | toYaml | nindent 12 }}
{{- end }}
spec:
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 12 }}
{{- end }}
{{- if .Values.priorityClassName }}
priorityClassName: {{ .Values.priorityClassName }}
{{- end }}
serviceAccountName: {{ template "descheduler.serviceAccountName" . }}
restartPolicy: "Never"
{{- with .Values.imagePullSecrets }}
imagePullSecrets:
{{- toYaml . | nindent 10 }}
{{- end }}
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default (printf "v%s" .Chart.AppVersion) }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
command:
- "/bin/descheduler"
args:
- "--policy-config-file"
- "/policy-dir/policy.yaml"
{{- range $key, $value := .Values.cmdOptions }}
- {{ printf "--%s" $key | quote }}
{{- if $value }}
- {{ $value | quote }}
{{- end }}
{{- end }}
resources:
{{- toYaml .Values.resources | nindent 16 }}
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
privileged: false
readOnlyRootFilesystem: true
runAsNonRoot: true
volumeMounts:
- mountPath: /policy-dir
name: policy-volume
volumes:
- name: policy-volume
configMap:
name: {{ template "descheduler.fullname" . }}
{{- end }}

View File

@@ -0,0 +1,75 @@
{{- if eq .Values.kind "Deployment" }}
apiVersion: apps/v1
kind: Deployment
metadata:
name: {{ template "descheduler.fullname" . }}
labels:
{{- include "descheduler.labels" . | nindent 4 }}
spec:
replicas: 1
selector:
matchLabels:
{{- include "descheduler.selectorLabels" . | nindent 6 }}
template:
metadata:
labels:
{{- include "descheduler.selectorLabels" . | nindent 8 }}
{{- if .Values.podLabels }}
{{- .Values.podLabels | toYaml | nindent 8 }}
{{- end }}
annotations:
checksum/config: {{ include (print $.Template.BasePath "/configmap.yaml") . | sha256sum }}
{{- if .Values.podAnnotations }}
{{- .Values.podAnnotations | toYaml | nindent 8 }}
{{- end }}
spec:
{{- if .Values.priorityClassName }}
priorityClassName: {{ .Values.priorityClassName }}
{{- end }}
serviceAccountName: {{ template "descheduler.serviceAccountName" . }}
containers:
- name: {{ .Chart.Name }}
image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default (printf "v%s" .Chart.AppVersion) }}"
imagePullPolicy: {{ .Values.image.pullPolicy }}
command:
- "/bin/descheduler"
args:
- "--policy-config-file"
- "/policy-dir/policy.yaml"
- "--descheduling-interval"
- {{ required "deschedulingInterval required for running as Deployment" .Values.deschedulingInterval }}
{{- range $key, $value := .Values.cmdOptions }}
- {{ printf "--%s" $key | quote }}
{{- if $value }}
- {{ $value | quote }}
{{- end }}
{{- end }}
ports:
- containerPort: 10258
protocol: TCP
resources:
{{- toYaml .Values.resources | nindent 12 }}
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
privileged: false
readOnlyRootFilesystem: true
runAsNonRoot: true
volumeMounts:
- mountPath: /policy-dir
name: policy-volume
volumes:
- name: policy-volume
configMap:
name: {{ template "descheduler.fullname" . }}
{{- with .Values.nodeSelector }}
nodeSelector:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- with .Values.tolerations }}
tolerations:
{{- toYaml . | nindent 8 }}
{{- end }}
{{- end }}

View File

@@ -0,0 +1,38 @@
{{- if .Values.podSecurityPolicy.create -}}
apiVersion: policy/v1beta1
kind: PodSecurityPolicy
metadata:
name: {{ template "descheduler.fullname" . }}
annotations:
seccomp.security.alpha.kubernetes.io/allowedProfileNames: 'docker/default,runtime/default'
seccomp.security.alpha.kubernetes.io/defaultProfileName: 'runtime/default'
spec:
privileged: false
allowPrivilegeEscalation: false
requiredDropCapabilities:
- ALL
volumes:
- 'configMap'
- 'secret'
hostNetwork: false
hostIPC: false
hostPID: false
runAsUser:
rule: 'MustRunAs'
ranges:
- min: 1
max: 65535
seLinux:
rule: 'RunAsAny'
supplementalGroups:
rule: 'MustRunAs'
ranges:
- min: 1
max: 65535
fsGroup:
rule: 'MustRunAs'
ranges:
- min: 1
max: 65535
readOnlyRootFilesystem: true
{{- end -}}

View File

@@ -0,0 +1,8 @@
{{- if .Values.serviceAccount.create -}}
apiVersion: v1
kind: ServiceAccount
metadata:
name: {{ template "descheduler.serviceAccountName" . }}
labels:
{{- include "descheduler.labels" . | nindent 4 }}
{{- end -}}

View File

@@ -0,0 +1,29 @@
apiVersion: v1
kind: Pod
metadata:
name: descheduler-test-pod
annotations:
"helm.sh/hook": test
spec:
restartPolicy: Never
serviceAccountName: descheduler-ci
containers:
- name: descheduler-test-container
image: alpine:latest
imagePullPolicy: IfNotPresent
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- All
privileged: false
runAsNonRoot: false
command: ["/bin/ash"]
args:
- -c
- >-
apk --no-cache add curl &&
curl -LO https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl &&
chmod +x ./kubectl &&
mv ./kubectl /usr/local/bin/kubectl &&
/usr/local/bin/kubectl get pods --namespace kube-system --token "$(cat /var/run/secrets/kubernetes.io/serviceaccount/token)" | grep "descheduler" | grep "Completed"

View File

@@ -0,0 +1,92 @@
# Default values for descheduler.
# This is a YAML-formatted file.
# Declare variables to be passed into your templates.
# CronJob or Deployment
kind: CronJob
image:
repository: k8s.gcr.io/descheduler/descheduler
# Overrides the image tag whose default is the chart version
tag: ""
pullPolicy: IfNotPresent
imagePullSecrets: []
resources:
requests:
cpu: 500m
memory: 256Mi
# limits:
# cpu: 100m
# memory: 128Mi
nameOverride: ""
fullnameOverride: ""
cronJobApiVersion: "batch/v1" # Use "batch/v1beta1" for k8s version < 1.21.0. TODO(@7i) remove with 1.23 release
schedule: "*/2 * * * *"
#startingDeadlineSeconds: 200
#successfulJobsHistoryLimit: 1
#failedJobsHistoryLimit: 1
# Required when running as a Deployment
deschedulingInterval: 5m
cmdOptions:
v: 3
# evict-local-storage-pods:
# max-pods-to-evict-per-node: 10
# node-selector: "key1=value1,key2=value2"
deschedulerPolicy:
strategies:
RemoveDuplicates:
enabled: true
RemovePodsViolatingNodeTaints:
enabled: true
RemovePodsViolatingNodeAffinity:
enabled: true
params:
nodeAffinityType:
- requiredDuringSchedulingIgnoredDuringExecution
RemovePodsViolatingInterPodAntiAffinity:
enabled: true
LowNodeUtilization:
enabled: true
params:
nodeResourceUtilizationThresholds:
thresholds:
cpu: 20
memory: 20
pods: 20
targetThresholds:
cpu: 50
memory: 50
pods: 50
priorityClassName: system-cluster-critical
nodeSelector: {}
# foo: bar
tolerations: []
# - key: 'management'
# operator: 'Equal'
# value: 'tool'
# effect: 'NoSchedule'
rbac:
# Specifies whether RBAC resources should be created
create: true
podSecurityPolicy:
# Specifies whether PodSecurityPolicy should be created.
create: true
serviceAccount:
# Specifies whether a ServiceAccount should be created
create: true
# The name of the ServiceAccount to use.
# If not set and create is true, a name is generated using the fullname template
name:

View File

@@ -14,7 +14,7 @@ steps:
- VERSION=$_GIT_TAG
- BASE_REF=$_PULL_BASE_REF
args:
- push
- push-all
substitutions:
# _GIT_TAG will be filled with a git-based tag for the image, of the form vYYYYMMDD-hash, and
# can be used as a substitution

View File

@@ -18,44 +18,80 @@ limitations under the License.
package options
import (
clientset "k8s.io/client-go/kubernetes"
"github.com/spf13/pflag"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
apiserveroptions "k8s.io/apiserver/pkg/server/options"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/component-base/logs"
// install the componentconfig api so we get its defaulting and conversion functions
"sigs.k8s.io/descheduler/pkg/apis/componentconfig"
"sigs.k8s.io/descheduler/pkg/apis/componentconfig/v1alpha1"
deschedulerscheme "sigs.k8s.io/descheduler/pkg/descheduler/scheme"
)
"github.com/spf13/pflag"
const (
DefaultDeschedulerPort = 10258
)
// DeschedulerServer configuration
type DeschedulerServer struct {
componentconfig.DeschedulerConfiguration
Client clientset.Interface
Client clientset.Interface
Logs *logs.Options
SecureServing *apiserveroptions.SecureServingOptionsWithLoopback
DisableMetrics bool
}
// NewDeschedulerServer creates a new DeschedulerServer with default parameters
func NewDeschedulerServer() *DeschedulerServer {
versioned := v1alpha1.DeschedulerConfiguration{}
deschedulerscheme.Scheme.Default(&versioned)
cfg := componentconfig.DeschedulerConfiguration{}
deschedulerscheme.Scheme.Convert(versioned, &cfg, nil)
s := DeschedulerServer{
DeschedulerConfiguration: cfg,
func NewDeschedulerServer() (*DeschedulerServer, error) {
cfg, err := newDefaultComponentConfig()
if err != nil {
return nil, err
}
return &s
secureServing := apiserveroptions.NewSecureServingOptions().WithLoopback()
secureServing.BindPort = DefaultDeschedulerPort
return &DeschedulerServer{
DeschedulerConfiguration: *cfg,
Logs: logs.NewOptions(),
SecureServing: secureServing,
}, nil
}
// Validation checks for DeschedulerServer.
func (s *DeschedulerServer) Validate() error {
var errs []error
errs = append(errs, s.Logs.Validate()...)
return utilerrors.NewAggregate(errs)
}
func newDefaultComponentConfig() (*componentconfig.DeschedulerConfiguration, error) {
versionedCfg := v1alpha1.DeschedulerConfiguration{}
deschedulerscheme.Scheme.Default(&versionedCfg)
cfg := componentconfig.DeschedulerConfiguration{}
if err := deschedulerscheme.Scheme.Convert(&versionedCfg, &cfg, nil); err != nil {
return nil, err
}
return &cfg, nil
}
// AddFlags adds flags for a specific SchedulerServer to the specified FlagSet
func (rs *DeschedulerServer) AddFlags(fs *pflag.FlagSet) {
fs.StringVar(&rs.Logging.Format, "logging-format", "text", `Sets the log format. Permitted formats: "text", "json". Non-default formats don't honor these flags: --add-dir-header, --alsologtostderr, --log-backtrace-at, --log-dir, --log-file, --log-file-max-size, --logtostderr, --skip-headers, --skip-log-headers, --stderrthreshold, --log-flush-frequency.\nNon-default choices are currently alpha and subject to change without warning.`)
fs.DurationVar(&rs.DeschedulingInterval, "descheduling-interval", rs.DeschedulingInterval, "Time interval between two consecutive descheduler executions. Setting this value instructs the descheduler to run in a continuous loop at the interval specified.")
fs.StringVar(&rs.KubeconfigFile, "kubeconfig", rs.KubeconfigFile, "File with kube configuration.")
fs.StringVar(&rs.PolicyConfigFile, "policy-config-file", rs.PolicyConfigFile, "File with descheduler policy configuration.")
fs.BoolVar(&rs.DryRun, "dry-run", rs.DryRun, "execute descheduler in dry run mode.")
// node-selector query causes descheduler to run only on nodes that matches the node labels in the query
fs.StringVar(&rs.NodeSelector, "node-selector", rs.NodeSelector, "Selector (label query) to filter on, supports '=', '==', and '!='.(e.g. -l key1=value1,key2=value2)")
fs.StringVar(&rs.NodeSelector, "node-selector", rs.NodeSelector, "DEPRECATED: selector (label query) to filter on, supports '=', '==', and '!='.(e.g. -l key1=value1,key2=value2)")
// max-no-pods-to-evict limits the maximum number of pods to be evicted per node by descheduler.
fs.IntVar(&rs.MaxNoOfPodsToEvictPerNode, "max-pods-to-evict-per-node", rs.MaxNoOfPodsToEvictPerNode, "Limits the maximum number of pods to be evicted per node by descheduler")
fs.IntVar(&rs.MaxNoOfPodsToEvictPerNode, "max-pods-to-evict-per-node", rs.MaxNoOfPodsToEvictPerNode, "DEPRECATED: limits the maximum number of pods to be evicted per node by descheduler")
// evict-local-storage-pods allows eviction of pods that are using local storage. This is false by default.
fs.BoolVar(&rs.EvictLocalStoragePods, "evict-local-storage-pods", rs.EvictLocalStoragePods, "Enables evicting pods using local storage by descheduler")
fs.BoolVar(&rs.EvictLocalStoragePods, "evict-local-storage-pods", rs.EvictLocalStoragePods, "DEPRECATED: enables evicting pods using local storage by descheduler")
fs.BoolVar(&rs.DisableMetrics, "disable-metrics", rs.DisableMetrics, "Disables metrics. The metrics are by default served through https://localhost:10258/metrics. Secure address, resp. port can be changed through --bind-address, resp. --secure-port flags.")
rs.SecureServing.AddFlags(fs)
}

View File

@@ -18,6 +18,7 @@ limitations under the License.
package app
import (
"context"
"flag"
"io"
@@ -26,29 +27,61 @@ import (
"github.com/spf13/cobra"
apiserver "k8s.io/apiserver/pkg/server"
"k8s.io/apiserver/pkg/server/mux"
restclient "k8s.io/client-go/rest"
aflag "k8s.io/component-base/cli/flag"
"k8s.io/component-base/logs"
"k8s.io/klog"
"k8s.io/component-base/metrics/legacyregistry"
"k8s.io/klog/v2"
)
// NewDeschedulerCommand creates a *cobra.Command object with default parameters
func NewDeschedulerCommand(out io.Writer) *cobra.Command {
s := options.NewDeschedulerServer()
s, err := options.NewDeschedulerServer()
if err != nil {
klog.ErrorS(err, "unable to initialize server")
}
cmd := &cobra.Command{
Use: "descheduler",
Short: "descheduler",
Long: `The descheduler evicts pods which may be bound to less desired nodes`,
Run: func(cmd *cobra.Command, args []string) {
logs.InitLogs()
defer logs.FlushLogs()
s.Logs.Config.Format = s.Logging.Format
s.Logs.Apply()
// LoopbackClientConfig is a config for a privileged loopback connection
var LoopbackClientConfig *restclient.Config
var SecureServing *apiserver.SecureServingInfo
if err := s.SecureServing.ApplyTo(&SecureServing, &LoopbackClientConfig); err != nil {
klog.ErrorS(err, "failed to apply secure server configuration")
return
}
if err := s.Validate(); err != nil {
klog.ErrorS(err, "failed to validate server configuration")
return
}
if !s.DisableMetrics {
ctx := context.TODO()
pathRecorderMux := mux.NewPathRecorderMux("descheduler")
pathRecorderMux.Handle("/metrics", legacyregistry.HandlerWithReset())
if _, err := SecureServing.Serve(pathRecorderMux, 0, ctx.Done()); err != nil {
klog.Fatalf("failed to start secure server: %v", err)
return
}
}
err := Run(s)
if err != nil {
klog.Errorf("%v", err)
klog.ErrorS(err, "descheduler server")
}
},
}
cmd.SetOutput(out)
cmd.SetOut(out)
flags := cmd.Flags()
flags.SetNormalizeFunc(aflag.WordSepNormalizeFunc)
flags.AddGoFlagSet(flag.CommandLine)

View File

@@ -18,70 +18,19 @@ package app
import (
"fmt"
"runtime"
"strings"
"github.com/spf13/cobra"
"sigs.k8s.io/descheduler/pkg/version"
)
var (
// gitCommit is a constant representing the source version that
// generated this build. It should be set during build via -ldflags.
gitCommit string
// version is a constant representing the version tag that
// generated this build. It should be set during build via -ldflags.
version string
// buildDate in ISO8601 format, output of $(date -u +'%Y-%m-%dT%H:%M:%SZ')
//It should be set during build via -ldflags.
buildDate string
)
// Info holds the information related to descheduler app version.
type Info struct {
Major string `json:"major"`
Minor string `json:"minor"`
GitCommit string `json:"gitCommit"`
GitVersion string `json:"gitVersion"`
BuildDate string `json:"buildDate"`
GoVersion string `json:"goVersion"`
Compiler string `json:"compiler"`
Platform string `json:"platform"`
}
// Get returns the overall codebase version. It's for detecting
// what code a binary was built from.
func Get() Info {
majorVersion, minorVersion := splitVersion(version)
return Info{
Major: majorVersion,
Minor: minorVersion,
GitCommit: gitCommit,
GitVersion: version,
BuildDate: buildDate,
GoVersion: runtime.Version(),
Compiler: runtime.Compiler,
Platform: fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH),
}
}
func NewVersionCommand() *cobra.Command {
var versionCmd = &cobra.Command{
Use: "version",
Short: "Version of descheduler",
Long: `Prints the version of descheduler.`,
Run: func(cmd *cobra.Command, args []string) {
fmt.Printf("Descheduler version %+v\n", Get())
fmt.Printf("Descheduler version %+v\n", version.Get())
},
}
return versionCmd
}
// splitVersion splits the git version to generate major and minor versions needed.
func splitVersion(version string) (string, string) {
if version == "" {
return "", ""
}
// A sample version would be of form v0.1.0-7-ge884046, so split at first '.' and
// then return 0 and 1+(+ appended to follow semver convention) for major and minor versions.
return strings.Trim(strings.Split(version, ".")[0], "v"), strings.Split(version, ".")[1] + "+"
}

View File

@@ -17,10 +17,9 @@ limitations under the License.
package main
import (
"flag"
"fmt"
"k8s.io/component-base/logs"
"os"
"sigs.k8s.io/descheduler/cmd/descheduler/app"
)
@@ -28,7 +27,10 @@ func main() {
out := os.Stdout
cmd := app.NewDeschedulerCommand(out)
cmd.AddCommand(app.NewVersionCommand())
flag.CommandLine.Parse([]string{})
logs.InitLogs()
defer logs.FlushLogs()
if err := cmd.Execute(); err != nil {
fmt.Println(err)
os.Exit(1)

View File

@@ -3,10 +3,10 @@
## Required Tools
- [Git](https://git-scm.com/downloads)
- [Go 1.13+](https://golang.org/dl/)
- [Go 1.16+](https://golang.org/dl/)
- [Docker](https://docs.docker.com/install/)
- [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl)
- [kind](https://kind.sigs.k8s.io/)
- [kind v0.10.0+](https://kind.sigs.k8s.io/)
## Build and Run
@@ -34,9 +34,23 @@ GOOS=linux make dev-image
kind create cluster --config hack/kind_config.yaml
kind load docker-image <image name>
kind get kubeconfig > /tmp/admin.conf
export KUBECONFIG=/tmp/admin.conf
make test-unit
make test-e2e
```
## Run Helm Tests
Run the helm test for a particular descheduler release by setting below variables,
```
HELM_IMAGE_REPO="descheduler"
HELM_IMAGE_TAG="helm-test"
HELM_CHART_LOCATION="./charts/descheduler"
```
The helm tests runs as part of descheduler CI. But, to run it manually from the descheduler root,
```
make test-helm
```
### Miscellaneous
See the [hack directory](https://github.com/kubernetes-sigs/descheduler/tree/master/hack) for additional tools and scripts used for developing the descheduler.
See the [hack directory](https://github.com/kubernetes-sigs/descheduler/tree/master/hack) for additional tools and scripts used for developing the descheduler.

View File

@@ -1,32 +1,42 @@
# Release Guide
## Semi-automatic
## Container Image
### Semi-automatic
1. Make sure your repo is clean by git's standards
2. Create a release branch `git checkout -b release-1.18` (not required for patch releases)
3. Push the release branch to the descheuler repo and ensure branch protection is enabled (not required for patch releases)
4. Tag the repository and push the tag `VERSION=v0.18.0 git tag -m $VERSION $VERSION; git push origin $VERSION`
4. Tag the repository from the `master` branch (from the `release-1.18` branch for a patch release) and push the tag `VERSION=v0.18.0 git tag -m $VERSION $VERSION; git push origin $VERSION`
5. Publish a draft release using the tag you just created
6. Perform the [image promotion process](https://github.com/kubernetes/k8s.io/tree/master/k8s.gcr.io#image-promoter)
6. Perform the [image promotion process](https://github.com/kubernetes/k8s.io/tree/main/k8s.gcr.io#image-promoter)
7. Publish release
8. Email `kubernetes-sig-scheduling@googlegroups.com` to announce the release
## Manual
### Manual
1. Make sure your repo is clean by git's standards
2. Create a release branch `git checkout -b release-1.18` (not required for patch releases)
3. Push the release branch to the descheuler repo and ensure branch protection is enabled (not required for patch releases)
4. Tag the repository and push the tag `VERSION=v0.18.0 git tag -m $VERSION $VERSION; git push origin $VERSION`
4. Tag the repository from the `master` branch (from the `release-1.18` branch for a patch release) and push the tag `VERSION=v0.18.0 git tag -m $VERSION $VERSION; git push origin $VERSION`
5. Checkout the tag you just created and make sure your repo is clean by git's standards `git checkout $VERSION`
6. Build and push the container image to the staging registry `VERSION=$VERSION make push`
6. Build and push the container image to the staging registry `VERSION=$VERSION make push-all`
7. Publish a draft release using the tag you just created
8. Perform the [image promotion process](https://github.com/kubernetes/k8s.io/tree/master/k8s.gcr.io#image-promoter)
8. Perform the [image promotion process](https://github.com/kubernetes/k8s.io/tree/main/k8s.gcr.io#image-promoter)
9. Publish release
10. Email `kubernetes-sig-scheduling@googlegroups.com` to announce the release
## Notes
### Notes
It's important to create the tag on the master branch after creating the release-* branch so that the [Helm releaser-action](#helm-chart) can work.
It compares the changes in the action-triggering branch to the latest tag on that branch, so if you tag before creating the new branch there
will be nothing to compare and it will fail (creating a new release branch usually involves no code changes). For this same reason, you should
also tag patch releases (on the release-* branch) *after* pushing changes (if those changes involve bumping the Helm chart version).
See [post-descheduler-push-images dashboard](https://testgrid.k8s.io/sig-scheduling#post-descheduler-push-images) for staging registry image build job status.
View the descheduler staging registry using [this URL](https://console.cloud.google.com/gcr/images/k8s-staging-descheduler/GLOBAL/descheduler) in a web browser
or use the below `gcloud` commands.
List images in staging registry.
```
gcloud container images list --repository gcr.io/k8s-staging-descheduler
@@ -46,3 +56,19 @@ Pull image from the staging registry.
```
docker pull gcr.io/k8s-staging-descheduler/descheduler:v20200206-0.9.0-94-ge2a23f284
```
## Helm Chart
Helm chart releases are managed by a separate set of git tags that are prefixed with `descheduler-helm-chart-*`. Example git tag name is `descheduler-helm-chart-0.18.0`.
Released versions of the helm charts are stored in the `gh-pages` branch of this repo. The [chart-releaser-action GitHub Action](https://github.com/helm/chart-releaser-action)
is setup to build and push the helm charts to the `gh-pages` branch when changes are pushed to a `release-*` branch.
The major and minor version of the chart matches the descheduler major and minor versions. For example descheduler helm chart version helm-descheduler-chart-0.18.0 corresponds
to descheduler version v0.18.0. The patch version of the descheduler helm chart and the patcher version of the descheduler will not necessarily match. The patch
version of the descheduler helm chart is used to version changes specific to the helm chart.
1. Merge all helm chart changes into the master branch before the release is tagged/cut
1. Ensure that `appVersion` in file `charts/descheduler/Chart.yaml` matches the descheduler version(no `v` prefix)
2. Ensure that `version` in file `charts/descheduler/Chart.yaml` has been incremented. This is the chart version.
2. Make sure your repo is clean by git's standards
3. Follow the release-branch or patch release tagging pattern from the above section.
4. Verify the new helm artifact has been successfully pushed to the `gh-pages` branch

View File

@@ -1,9 +1,25 @@
# User Guide
Starting with descheduler release v0.10.0 container images are available in these container registries.
* `asia.gcr.io/k8s-artifacts-prod/descheduler/descheduler`
* `eu.gcr.io/k8s-artifacts-prod/descheduler/descheduler`
* `us.gcr.io/k8s-artifacts-prod/descheduler/descheduler`
Starting with descheduler release v0.10.0 container images are available in the official k8s container registry.
Descheduler Version | Container Image | Architectures |
------------------- |-----------------------------------------------------|-------------------------|
v0.22.1 | k8s.gcr.io/descheduler/descheduler:v0.22.1 | AMD64<br>ARM64<br>ARMv7 |
v0.22.0 | k8s.gcr.io/descheduler/descheduler:v0.22.0 | AMD64<br>ARM64<br>ARMv7 |
v0.21.0 | k8s.gcr.io/descheduler/descheduler:v0.21.0 | AMD64<br>ARM64<br>ARMv7 |
v0.20.0 | k8s.gcr.io/descheduler/descheduler:v0.20.0 | AMD64<br>ARM64 |
v0.19.0 | k8s.gcr.io/descheduler/descheduler:v0.19.0 | AMD64 |
v0.18.0 | k8s.gcr.io/descheduler/descheduler:v0.18.0 | AMD64 |
v0.10.0 | k8s.gcr.io/descheduler/descheduler:v0.10.0 | AMD64 |
Note that multi-arch container images cannot be pulled by [kind](https://kind.sigs.k8s.io) from a registry. Therefore
starting with descheduler release v0.20.0 use the below process to download the official descheduler
image into a kind cluster.
```
kind create cluster
docker pull k8s.gcr.io/descheduler/descheduler:v0.20.0
kind load docker-image k8s.gcr.io/descheduler/descheduler:v0.20.0
```
## Policy Configuration Examples
The [examples](https://github.com/kubernetes-sigs/descheduler/tree/master/examples) directory has descheduler policy configuration examples.
@@ -23,11 +39,11 @@ Available Commands:
version Version of descheduler
Flags:
--add-dir-header If true, adds the file directory to the header
--add-dir-header If true, adds the file directory to the header of the log messages
--alsologtostderr log to standard error as well as files
--descheduling-interval duration Time interval between two consecutive descheduler executions. Setting this value instructs the descheduler to run in a continuous loop at the interval specified.
--dry-run execute descheduler in dry run mode.
--evict-local-storage-pods Enables evicting pods using local storage by descheduler
--evict-local-storage-pods DEPRECATED: enables evicting pods using local storage by descheduler
-h, --help help for descheduler
--kubeconfig string File with kube configuration.
--log-backtrace-at traceLocation when logging hits line file:N, emit a stack trace (default :0)
@@ -36,8 +52,8 @@ Flags:
--log-file-max-size uint Defines the maximum size a log file can grow to. Unit is megabytes. If the value is 0, the maximum file size is unlimited. (default 1800)
--log-flush-frequency duration Maximum number of seconds between log flushes (default 5s)
--logtostderr log to standard error instead of files (default true)
--max-pods-to-evict-per-node int Limits the maximum number of pods to be evicted per node by descheduler
--node-selector string Selector (label query) to filter on, supports '=', '==', and '!='.(e.g. -l key1=value1,key2=value2)
--max-pods-to-evict-per-node int DEPRECATED: limits the maximum number of pods to be evicted per node by descheduler
--node-selector string DEPRECATED: selector (label query) to filter on, supports '=', '==', and '!='.(e.g. -l key1=value1,key2=value2)
--policy-config-file string File with descheduler policy configuration.
--skip-headers If true, avoid header prefixes in the log messages
--skip-log-headers If true, avoid headers when opening log files
@@ -71,20 +87,57 @@ This policy configuration file ensures that pods created more than 7 days ago ar
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"LowNodeUtilization":
enabled: false
"RemoveDuplicates":
enabled: false
"RemovePodsViolatingInterPodAntiAffinity":
enabled: false
"RemovePodsViolatingNodeAffinity":
enabled: false
"RemovePodsViolatingNodeTaints":
enabled: false
"RemovePodsHavingTooManyRestarts":
enabled: false
"PodLifeTime":
enabled: true
params:
maxPodLifeTimeSeconds: 604800 # pods run for a maximum of 7 days
```
### Balance Cluster By Node Memory Utilization
If your cluster has been running for a long period of time, you may find that the resource utilization is not very
balanced. The following two strategies can be used to rebalance your cluster based on `cpu`, `memory`
or `number of pods`.
#### Balance high utilization nodes
Using `LowNodeUtilization`, descheduler will rebalance the cluster based on memory by evicting pods
from nodes with memory utilization over 70% to nodes with memory utilization below 20%.
```
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"LowNodeUtilization":
enabled: true
params:
nodeResourceUtilizationThresholds:
thresholds:
"memory": 20
targetThresholds:
"memory": 70
```
#### Balance low utilization nodes
Using `HighNodeUtilization`, descheduler will rebalance the cluster based on memory by evicting pods
from nodes with memory utilization lower than 20%. This should be used along with scheduler strategy `MostRequestedPriority`.
The evicted pods will be compacted into minimal set of nodes.
```
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"HighNodeUtilization":
enabled: true
params:
nodeResourceUtilizationThresholds:
thresholds:
"memory": 20
```
### Autoheal Node Problems
Descheduler's `RemovePodsViolatingNodeTaints` strategy can be combined with
[Node Problem Detector](https://github.com/kubernetes/node-problem-detector/) and
[Cluster Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler) to automatically remove
Nodes which have problems. Node Problem Detector can detect specific Node problems and taint any Nodes which have those
problems. The Descheduler will then deschedule workloads from those Nodes. Finally, if the descheduled Node's resource
allocation falls below the Cluster Autoscaler's scale down threshold, the Node will become a scale down candidate
and can be removed by Cluster Autoscaler. These three components form an autohealing cycle for Node problems.

14
examples/failed-pods.yaml Normal file
View File

@@ -0,0 +1,14 @@
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"RemoveFailedPods":
enabled: true
params:
failedPods:
reasons:
- "OutOfcpu"
- "CreateContainerConfigError"
includingInitContainers: true
excludeOwnerKinds:
- "Job"
minPodLifeTimeSeconds: 3600 # 1 hour

View File

@@ -0,0 +1,9 @@
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"HighNodeUtilization":
enabled: true
params:
nodeResourceUtilizationThresholds:
thresholds:
"memory": 20

View File

@@ -0,0 +1,11 @@
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"LowNodeUtilization":
enabled: true
params:
nodeResourceUtilizationThresholds:
thresholds:
"memory": 20
targetThresholds:
"memory": 70

View File

@@ -1,20 +1,8 @@
---
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"LowNodeUtilization":
enabled: false
"RemoveDuplicates":
enabled: false
"RemovePodsViolatingInterPodAntiAffinity":
enabled: false
"RemovePodsViolatingNodeAffinity":
enabled: false
"RemovePodsViolatingNodeTaints":
enabled: false
"RemovePodsHavingTooManyRestarts":
enabled: false
"PodLifeTime":
enabled: true
params:
maxPodLifeTimeSeconds: 604800 # 7 days
podLifeTime:
maxPodLifeTimeSeconds: 604800 # 7 days

View File

@@ -21,5 +21,9 @@ strategies:
enabled: true
params:
podsHavingTooManyRestarts:
podRestartThresholds: 100
podRestartThreshold: 100
includingInitContainers: true
"RemovePodsViolatingTopologySpreadConstraint":
enabled: true
params:
includeSoftConstraints: true

View File

@@ -0,0 +1,7 @@
apiVersion: "descheduler/v1alpha1"
kind: "DeschedulerPolicy"
strategies:
"RemovePodsViolatingTopologySpreadConstraint":
enabled: true
params:
includeSoftConstraints: true # Include 'ScheduleAnyways' constraints

21
go.mod
View File

@@ -1,14 +1,19 @@
module sigs.k8s.io/descheduler
go 1.13
go 1.16
require (
github.com/spf13/cobra v0.0.5
github.com/client9/misspell v0.3.4
github.com/spf13/cobra v1.1.3
github.com/spf13/pflag v1.0.5
k8s.io/api v0.18.2
k8s.io/apimachinery v0.18.2
k8s.io/apiserver v0.18.2
k8s.io/client-go v0.18.2
k8s.io/component-base v0.18.2
k8s.io/klog v1.0.0
k8s.io/api v0.22.0
k8s.io/apimachinery v0.22.0
k8s.io/apiserver v0.22.0
k8s.io/client-go v0.22.0
k8s.io/code-generator v0.22.0
k8s.io/component-base v0.22.0
k8s.io/component-helpers v0.22.0
k8s.io/klog/v2 v2.9.0
k8s.io/kubectl v0.20.5
sigs.k8s.io/mdtoc v1.0.1
)

792
go.sum

File diff suppressed because it is too large Load Diff

6
hack/.spelling_failures Normal file
View File

@@ -0,0 +1,6 @@
BUILD
CHANGELOG
OWNERS
go.mod
go.sum
vendor/

View File

@@ -18,15 +18,15 @@ E2E_GCE_HOME=$DESCHEDULER_ROOT/hack/e2e-gce
create_cluster() {
echo "#################### Creating instances ##########################"
gcloud compute instances create descheduler-$master_uuid --image="ubuntu-1604-xenial-v20180306" --image-project="ubuntu-os-cloud" --zone=us-east1-b
gcloud compute instances create descheduler-$master_uuid --image-family="ubuntu-1804-lts" --image-project="ubuntu-os-cloud" --zone=us-east1-b
# Keeping the --zone here so as to make sure that e2e's can run locally.
echo "gcloud compute instances delete descheduler-$master_uuid --zone=us-east1-b --quiet" > $E2E_GCE_HOME/delete_cluster.sh
gcloud compute instances create descheduler-$node1_uuid --image="ubuntu-1604-xenial-v20180306" --image-project="ubuntu-os-cloud" --zone=us-east1-b
gcloud compute instances create descheduler-$node1_uuid --image-family="ubuntu-1804-lts" --image-project="ubuntu-os-cloud" --zone=us-east1-b
echo "gcloud compute instances delete descheduler-$node1_uuid --zone=us-east1-b --quiet" >> $E2E_GCE_HOME/delete_cluster.sh
gcloud compute instances create descheduler-$node2_uuid --image="ubuntu-1604-xenial-v20180306" --image-project="ubuntu-os-cloud" --zone=us-east1-b
echo "gcloud compute instances delete descheduler-$node2_uuid --zone=us-east1-b --quiet" >> $E2E_GCE_HOME/delete_cluster.sh
gcloud compute instances create descheduler-$node2_uuid --image-family="ubuntu-1804-lts" --image-project="ubuntu-os-cloud" --zone=us-east1-b
echo "gcloud compute instances delete descheduler-$node2_uuid --zone=us-east1-c --quiet" >> $E2E_GCE_HOME/delete_cluster.sh
# Delete the firewall port created for master.
echo "gcloud compute firewall-rules delete kubeapiserver-$master_uuid --quiet" >> $E2E_GCE_HOME/delete_cluster.sh
@@ -44,10 +44,10 @@ generate_kubeadm_instance_files() {
transfer_install_files() {
gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$master_uuid:/tmp --zone=us-east1-b
gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$master_uuid:/tmp --zone=us-east1-b
gcloud compute scp $E2E_GCE_HOME/kubeadm_install.sh descheduler-$master_uuid:/tmp --zone=us-east1-b
gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$node1_uuid:/tmp --zone=us-east1-b
gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$node2_uuid:/tmp --zone=us-east1-b
gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$node1_uuid:/tmp --zone=us-east1-b
gcloud compute scp $E2E_GCE_HOME/kubeadm_preinstall.sh descheduler-$node2_uuid:/tmp --zone=us-east1-c
}
@@ -55,19 +55,19 @@ install_kube() {
# Docker installation.
gcloud compute ssh descheduler-$master_uuid --command "sudo apt-get update; sudo apt-get install -y docker.io" --zone=us-east1-b
gcloud compute ssh descheduler-$node1_uuid --command "sudo apt-get update; sudo apt-get install -y docker.io" --zone=us-east1-b
gcloud compute ssh descheduler-$node2_uuid --command "sudo apt-get update; sudo apt-get install -y docker.io" --zone=us-east1-b
gcloud compute ssh descheduler-$node2_uuid --command "sudo apt-get update; sudo apt-get install -y docker.io" --zone=us-east1-c
# kubeadm installation.
# 1. Transfer files to master, nodes.
transfer_install_files
# 2. Install kubeadm.
#TODO: Add rm /tmp/kubeadm_install.sh
# Open port for kube API server
gcloud compute firewall-rules create kubeapiserver-$master_uuid --allow tcp:6443 --source-tags=descheduler-$master_uuid --source-ranges=0.0.0.0/0 --description="Opening api server port"
gcloud compute firewall-rules create kubeapiserver-$master_uuid --allow tcp:6443 --source-tags=descheduler-$master_uuid --source-ranges=0.0.0.0/0 --description="Opening api server port"
gcloud compute ssh descheduler-$master_uuid --command "sudo chmod 755 /tmp/kubeadm_preinstall.sh; sudo /tmp/kubeadm_preinstall.sh" --zone=us-east1-b
kubeadm_join_command=$(gcloud compute ssh descheduler-$master_uuid --command "sudo chmod 755 /tmp/kubeadm_install.sh; sudo /tmp/kubeadm_install.sh" --zone=us-east1-b|grep 'kubeadm join')
# Copy the kubeconfig file onto /tmp for e2e tests.
# Copy the kubeconfig file onto /tmp for e2e tests.
gcloud compute ssh descheduler-$master_uuid --command "sudo cp /etc/kubernetes/admin.conf /tmp; sudo chmod 777 /tmp/admin.conf" --zone=us-east1-b
gcloud compute scp descheduler-$master_uuid:/tmp/admin.conf /tmp/admin.conf --zone=us-east1-b
@@ -75,16 +75,15 @@ install_kube() {
gcloud compute ssh descheduler-$master_uuid --command "sudo kubectl apply -f https://raw.githubusercontent.com/cloudnativelabs/kube-router/master/daemonset/kubeadm-kuberouter.yaml --kubeconfig /etc/kubernetes/admin.conf" --zone=us-east1-b
echo $kubeadm_join_command > $E2E_GCE_HOME/kubeadm_join.sh
# Copy kubeadm_join to every node.
# Copy kubeadm_join to every node.
#TODO: Put these in a loop, so that extension becomes possible.
gcloud compute ssh descheduler-$node1_uuid --command "sudo chmod 755 /tmp/kubeadm_preinstall.sh; sudo /tmp/kubeadm_preinstall.sh" --zone=us-east1-b
gcloud compute scp $E2E_GCE_HOME/kubeadm_join.sh descheduler-$node1_uuid:/tmp --zone=us-east1-b
gcloud compute ssh descheduler-$node1_uuid --command "sudo chmod 755 /tmp/kubeadm_join.sh; sudo /tmp/kubeadm_join.sh" --zone=us-east1-b
gcloud compute ssh descheduler-$node2_uuid --command "sudo chmod 755 /tmp/kubeadm_preinstall.sh; sudo /tmp/kubeadm_preinstall.sh" --zone=us-east1-b
gcloud compute scp $E2E_GCE_HOME/kubeadm_join.sh descheduler-$node2_uuid:/tmp --zone=us-east1-b
gcloud compute ssh descheduler-$node2_uuid --command "sudo chmod 755 /tmp/kubeadm_join.sh; sudo /tmp/kubeadm_join.sh" --zone=us-east1-b
gcloud compute ssh descheduler-$node2_uuid --command "sudo chmod 755 /tmp/kubeadm_preinstall.sh; sudo /tmp/kubeadm_preinstall.sh" --zone=us-east1-c
gcloud compute scp $E2E_GCE_HOME/kubeadm_join.sh descheduler-$node2_uuid:/tmp --zone=us-east1-c
gcloud compute ssh descheduler-$node2_uuid --command "sudo chmod 755 /tmp/kubeadm_join.sh; sudo /tmp/kubeadm_join.sh" --zone=us-east1-c
}

View File

@@ -1,6 +1,18 @@
kind: Cluster
apiVersion: kind.sigs.k8s.io/v1alpha3
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
- role: worker
kubeadmConfigPatches:
- |
kind: JoinConfiguration
nodeRegistration:
kubeletExtraArgs:
node-labels: "topology.kubernetes.io/zone=local-a"
- role: worker
kubeadmConfigPatches:
- |
kind: JoinConfiguration
nodeRegistration:
kubeletExtraArgs:
node-labels: "topology.kubernetes.io/zone=local-b"

26
hack/tools.go Normal file
View File

@@ -0,0 +1,26 @@
// +build tools
/*
Copyright 2019 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
// This package imports things required by build scripts, to force `go mod` to see them as dependencies
package tools
import (
_ "github.com/client9/misspell/cmd/misspell"
_ "k8s.io/code-generator"
_ "sigs.k8s.io/mdtoc"
)

View File

@@ -23,7 +23,7 @@ DESCHEDULER_ROOT=$(dirname "${BASH_SOURCE}")/..
GO_VERSION=($(go version))
if [[ -z $(echo "${GO_VERSION[2]}" | grep -E 'go1.2|go1.3|go1.4|go1.5|go1.6|go1.7|go1.8|go1.9|go1.10|go1.11|go1.12|go1.13') ]]; then
if [[ -z $(echo "${GO_VERSION[2]}" | grep -E 'go1.14|go1.15|go1.16') ]]; then
echo "Unknown go version '${GO_VERSION[2]}', skipping gofmt."
exit 1
fi

25
hack/update-toc.sh Executable file
View File

@@ -0,0 +1,25 @@
#!/bin/bash
# Copyright 2021 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -o errexit
set -o nounset
set -o pipefail
source "$(dirname "${BASH_SOURCE}")/lib/init.sh"
go build -o "${OS_OUTPUT_BINPATH}/mdtoc" "sigs.k8s.io/mdtoc"
${OS_OUTPUT_BINPATH}/mdtoc --inplace README.md

36
hack/verify-conversions.sh Executable file
View File

@@ -0,0 +1,36 @@
#!/bin/bash
set -o errexit
set -o nounset
set -o pipefail
source "$(dirname "${BASH_SOURCE}")/lib/init.sh"
DESCHEDULER_ROOT=$(dirname "${BASH_SOURCE}")/..
mkdir -p "${DESCHEDULER_ROOT}/_tmp"
_tmpdir="$(mktemp -d "${DESCHEDULER_ROOT}/_tmp/kube-verify.XXXXXX")"
_deschedulertmp="${_tmpdir}"
mkdir -p "${_deschedulertmp}"
git archive --format=tar --prefix=descheduler/ "$(git write-tree)" | (cd "${_deschedulertmp}" && tar xf -)
_deschedulertmp="${_deschedulertmp}/descheduler"
pushd "${_deschedulertmp}" > /dev/null 2>&1
go build -o "${OS_OUTPUT_BINPATH}/conversion-gen" "k8s.io/code-generator/cmd/conversion-gen"
${OS_OUTPUT_BINPATH}/conversion-gen \
--go-header-file "hack/boilerplate/boilerplate.go.txt" \
--input-dirs "./pkg/apis/componentconfig/v1alpha1,./pkg/api/v1alpha1" \
--output-file-base zz_generated.conversion
popd > /dev/null 2>&1
pushd "${DESCHEDULER_ROOT}" > /dev/null 2>&1
if ! _out="$(diff -Naupr pkg/ "${_deschedulertmp}/pkg/")"; then
echo "Generated output differs:" >&2
echo "${_out}" >&2
echo "Generated conversions verify failed. Please run ./hack/update-generated-conversions.sh"
exit 1
fi
popd > /dev/null 2>&1
echo "Generated conversions verified."

36
hack/verify-deep-copies.sh Executable file
View File

@@ -0,0 +1,36 @@
#!/bin/bash
set -o errexit
set -o nounset
set -o pipefail
source "$(dirname "${BASH_SOURCE}")/lib/init.sh"
DESCHEDULER_ROOT=$(dirname "${BASH_SOURCE}")/..
mkdir -p "${DESCHEDULER_ROOT}/_tmp"
_tmpdir="$(mktemp -d "${DESCHEDULER_ROOT}/_tmp/kube-verify.XXXXXX")"
_deschedulertmp="${_tmpdir}"
mkdir -p "${_deschedulertmp}"
git archive --format=tar --prefix=descheduler/ "$(git write-tree)" | (cd "${_deschedulertmp}" && tar xf -)
_deschedulertmp="${_deschedulertmp}/descheduler"
pushd "${_deschedulertmp}" > /dev/null 2>&1
go build -o "${OS_OUTPUT_BINPATH}/deepcopy-gen" "k8s.io/code-generator/cmd/deepcopy-gen"
${OS_OUTPUT_BINPATH}/deepcopy-gen \
--go-header-file "hack/boilerplate/boilerplate.go.txt" \
--input-dirs "./pkg/apis/componentconfig,./pkg/apis/componentconfig/v1alpha1,./pkg/api,./pkg/api/v1alpha1" \
--output-file-base zz_generated.deepcopy
popd > /dev/null 2>&1
pushd "${DESCHEDULER_ROOT}" > /dev/null 2>&1
if ! _out="$(diff -Naupr pkg/ "${_deschedulertmp}/pkg/")"; then
echo "Generated deep-copies output differs:" >&2
echo "${_out}" >&2
echo "Generated deep-copies verify failed. Please run ./hack/update-generated-deep-copies.sh"
exit 1
fi
popd > /dev/null 2>&1
echo "Generated deep-copies verified."

35
hack/verify-defaulters.sh Executable file
View File

@@ -0,0 +1,35 @@
#!/bin/bash
set -o errexit
set -o nounset
set -o pipefail
source "$(dirname "${BASH_SOURCE}")/lib/init.sh"
DESCHEDULER_ROOT=$(dirname "${BASH_SOURCE}")/..
_tmpdir="$(mktemp -d "${DESCHEDULER_ROOT}/_tmp/kube-verify.XXXXXX")"
_deschedulertmp="${_tmpdir}"
mkdir -p "${_deschedulertmp}"
git archive --format=tar --prefix=descheduler/ "$(git write-tree)" | (cd "${_deschedulertmp}" && tar xf -)
_deschedulertmp="${_deschedulertmp}/descheduler"
pushd "${_deschedulertmp}" > /dev/null 2>&1
go build -o "${OS_OUTPUT_BINPATH}/defaulter-gen" "k8s.io/code-generator/cmd/defaulter-gen"
${OS_OUTPUT_BINPATH}/defaulter-gen \
--go-header-file "hack/boilerplate/boilerplate.go.txt" \
--input-dirs "${PRJ_PREFIX}/pkg/apis/componentconfig/v1alpha1,${PRJ_PREFIX}/pkg/api/v1alpha1" \
--extra-peer-dirs "${PRJ_PREFIX}/pkg/apis/componentconfig/v1alpha1,${PRJ_PREFIX}/pkg/api/v1alpha1" \
--output-file-base zz_generated.defaults
popd > /dev/null 2>&1
pushd "${DESCHEDULER_ROOT}" > /dev/null 2>&1
if ! _out="$(diff -Naupr pkg/ "${_deschedulertmp}/pkg/")"; then
echo "Generated defaulters output differs:" >&2
echo "${_out}" >&2
echo "Generated defaulters verify failed. Please run ./hack/update-generated-defaulters.sh"
fi
popd > /dev/null 2>&1
echo "Generated Defaulters verified."

View File

@@ -23,7 +23,7 @@ DESCHEDULER_ROOT=$(dirname "${BASH_SOURCE}")/..
GO_VERSION=($(go version))
if [[ -z $(echo "${GO_VERSION[2]}" | grep -E 'go1.2|go1.3|go1.4|go1.5|go1.6|go1.7|go1.8|go1.9|go1.10|go1.11|go1.12|go1.13') ]]; then
if [[ -z $(echo "${GO_VERSION[2]}" | grep -E 'go1.14|go1.15|go1.16') ]]; then
echo "Unknown go version '${GO_VERSION[2]}', skipping gofmt."
exit 1
fi

19
hack/verify-govet.sh Executable file
View File

@@ -0,0 +1,19 @@
#!/bin/bash
# Copyright 2021 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
source "$(dirname "${BASH_SOURCE}")/lib/init.sh"
go vet ${OS_ROOT}/...

41
hack/verify-spelling.sh Executable file
View File

@@ -0,0 +1,41 @@
#!/usr/bin/env bash
# Copyright 2018 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script checks commonly misspelled English words in all files in the
# working directory by client9/misspell package.
# Usage: `hack/verify-spelling.sh`.
set -o errexit
set -o nounset
set -o pipefail
KUBE_ROOT=$(dirname "${BASH_SOURCE[0]}")/..
export KUBE_ROOT
source "${KUBE_ROOT}/hack/lib/init.sh"
# Ensure that we find the binaries we build before anything else.
export GOBIN="${OS_OUTPUT_BINPATH}"
PATH="${GOBIN}:${PATH}"
# Install tools we need
pushd "${KUBE_ROOT}" >/dev/null
GO111MODULE=on go install github.com/client9/misspell/cmd/misspell
popd >/dev/null
# Spell checking
# All the skipping files are defined in hack/.spelling_failures
skipping_file="${KUBE_ROOT}/hack/.spelling_failures"
failing_packages=$(sed "s| | -e |g" "${skipping_file}")
git ls-files | grep -v -e "${failing_packages}" | xargs misspell -i "Creater,creater,ect" -error -o stderr

29
hack/verify-toc.sh Executable file
View File

@@ -0,0 +1,29 @@
#!/bin/bash
# Copyright 2021 The Kubernetes Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set -o errexit
set -o nounset
set -o pipefail
source "$(dirname "${BASH_SOURCE}")/lib/init.sh"
go build -o "${OS_OUTPUT_BINPATH}/mdtoc" "sigs.k8s.io/mdtoc"
if ! ${OS_OUTPUT_BINPATH}/mdtoc --inplace --dryrun README.md
then
echo "ERROR: Changes detected to table of contents. Run ./hack/update-toc.sh" >&2
exit 1
fi

View File

@@ -0,0 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- configmap.yaml
- rbac.yaml

View File

@@ -3,7 +3,6 @@ kind: ClusterRole
apiVersion: rbac.authorization.k8s.io/v1
metadata:
name: descheduler-cluster-role
namespace: kube-system
rules:
- apiGroups: [""]
resources: ["events"]
@@ -11,12 +10,18 @@ rules:
- apiGroups: [""]
resources: ["nodes"]
verbs: ["get", "watch", "list"]
- apiGroups: [""]
resources: ["namespaces"]
verbs: ["get", "list"]
- apiGroups: [""]
resources: ["pods"]
verbs: ["get", "watch", "list", "delete"]
- apiGroups: [""]
resources: ["pods/eviction"]
verbs: ["create"]
- apiGroups: ["scheduling.k8s.io"]
resources: ["priorityclasses"]
verbs: ["get", "watch", "list"]
---
apiVersion: v1
kind: ServiceAccount
@@ -28,7 +33,6 @@ apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRoleBinding
metadata:
name: descheduler-cluster-role-binding
namespace: kube-system
roleRef:
apiGroup: rbac.authorization.k8s.io
kind: ClusterRole

View File

@@ -1,5 +1,5 @@
---
apiVersion: batch/v1beta1
apiVersion: batch/v1 # for k8s version < 1.21.0, use batch/v1beta1
kind: CronJob
metadata:
name: descheduler-cronjob
@@ -16,7 +16,7 @@ spec:
priorityClassName: system-cluster-critical
containers:
- name: descheduler
image: us.gcr.io/k8s-artifacts-prod/descheduler/descheduler:v0.10.0
image: k8s.gcr.io/descheduler/descheduler:v0.22.1
volumeMounts:
- mountPath: /policy-dir
name: policy-volume
@@ -27,6 +27,18 @@ spec:
- "/policy-dir/policy.yaml"
- "--v"
- "3"
resources:
requests:
cpu: "500m"
memory: "256Mi"
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
privileged: false
readOnlyRootFilesystem: true
runAsNonRoot: true
restartPolicy: "Never"
serviceAccountName: descheduler-sa
volumes:

View File

@@ -0,0 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../base
- cronjob.yaml

View File

@@ -0,0 +1,54 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: descheduler
namespace: kube-system
labels:
app: descheduler
spec:
replicas: 1
selector:
matchLabels:
app: descheduler
template:
metadata:
labels:
app: descheduler
spec:
priorityClassName: system-cluster-critical
serviceAccountName: descheduler-sa
containers:
- name: descheduler
image: k8s.gcr.io/descheduler/descheduler:v0.22.1
imagePullPolicy: IfNotPresent
command:
- "/bin/descheduler"
args:
- "--policy-config-file"
- "/policy-dir/policy.yaml"
- "--descheduling-interval"
- "5m"
- "--v"
- "3"
ports:
- containerPort: 10258
protocol: TCP
resources:
requests:
cpu: 500m
memory: 256Mi
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
privileged: false
readOnlyRootFilesystem: true
runAsNonRoot: true
volumeMounts:
- mountPath: /policy-dir
name: policy-volume
volumes:
- name: policy-volume
configMap:
name: descheduler-policy-configmap

View File

@@ -0,0 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../base
- deployment.yaml

View File

@@ -14,7 +14,7 @@ spec:
priorityClassName: system-cluster-critical
containers:
- name: descheduler
image: us.gcr.io/k8s-artifacts-prod/descheduler/descheduler:v0.10.0
image: k8s.gcr.io/descheduler/descheduler:v0.22.1
volumeMounts:
- mountPath: /policy-dir
name: policy-volume
@@ -25,6 +25,18 @@ spec:
- "/policy-dir/policy.yaml"
- "--v"
- "3"
resources:
requests:
cpu: "500m"
memory: "256Mi"
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- ALL
privileged: false
readOnlyRootFilesystem: true
runAsNonRoot: true
restartPolicy: "Never"
serviceAccountName: descheduler-sa
volumes:

View File

@@ -0,0 +1,6 @@
apiVersion: kustomize.config.k8s.io/v1beta1
kind: Kustomization
resources:
- ../base
- job.yaml

72
metrics/metrics.go Normal file
View File

@@ -0,0 +1,72 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metrics
import (
"sync"
"k8s.io/component-base/metrics"
"k8s.io/component-base/metrics/legacyregistry"
"sigs.k8s.io/descheduler/pkg/version"
)
const (
// DeschedulerSubsystem - subsystem name used by descheduler
DeschedulerSubsystem = "descheduler"
)
var (
PodsEvicted = metrics.NewCounterVec(
&metrics.CounterOpts{
Subsystem: DeschedulerSubsystem,
Name: "pods_evicted",
Help: "Number of evicted pods, by the result, by the strategy, by the namespace. 'failed' result means a pod could not be evicted",
StabilityLevel: metrics.ALPHA,
}, []string{"result", "strategy", "namespace"})
buildInfo = metrics.NewGauge(
&metrics.GaugeOpts{
Subsystem: DeschedulerSubsystem,
Name: "build_info",
Help: "Build info about descheduler, including Go version, Descheduler version, Git SHA, Git branch",
ConstLabels: map[string]string{"GoVersion": version.Get().GoVersion, "DeschedulerVersion": version.Get().GitVersion, "GitBranch": version.Get().GitBranch, "GitSha1": version.Get().GitSha1},
StabilityLevel: metrics.ALPHA,
},
)
metricsList = []metrics.Registerable{
PodsEvicted,
buildInfo,
}
)
var registerMetrics sync.Once
// Register all metrics.
func Register() {
// Register the metrics.
registerMetrics.Do(func() {
RegisterMetrics(metricsList...)
})
}
// RegisterMetrics registers a list of metrics.
func RegisterMetrics(extraMetrics ...metrics.Registerable) {
for _, metric := range extraMetrics {
legacyregistry.MustRegister(metric)
}
}

View File

@@ -19,8 +19,6 @@ package api
import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"sigs.k8s.io/descheduler/pkg/descheduler/scheme"
)
var (
@@ -35,12 +33,6 @@ const GroupName = "descheduler"
// SchemeGroupVersion is group version used to register these objects
var SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: runtime.APIVersionInternal}
func init() {
if err := addKnownTypes(scheme.Scheme); err != nil {
panic(err)
}
}
// Kind takes an unqualified kind and returns a Group qualified GroupKind
func Kind(kind string) schema.GroupKind {
return SchemeGroupVersion.WithKind(kind).GroupKind()

View File

@@ -28,6 +28,21 @@ type DeschedulerPolicy struct {
// Strategies
Strategies StrategyList
// NodeSelector for a set of nodes to operate over
NodeSelector *string
// EvictLocalStoragePods allows pods using local storage to be evicted.
EvictLocalStoragePods *bool
// EvictSystemCriticalPods allows eviction of pods of any priority (including Kubernetes system pods)
EvictSystemCriticalPods *bool
// IgnorePVCPods prevents pods with PVCs from being evicted.
IgnorePVCPods *bool
// MaxNoOfPodsToEvictPerNode restricts maximum of pods to be evicted per node.
MaxNoOfPodsToEvictPerNode *int
}
type StrategyName string
@@ -41,16 +56,32 @@ type DeschedulerStrategy struct {
Weight int
// Strategy parameters
Params StrategyParameters
Params *StrategyParameters
}
// Only one of its members may be specified
// Namespaces carries a list of included/excluded namespaces
// for which a given strategy is applicable
type Namespaces struct {
Include []string
Exclude []string
}
// Besides Namespaces only one of its members may be specified
// TODO(jchaloup): move Namespaces ThresholdPriority and ThresholdPriorityClassName to individual strategies
// once the policy version is bumped to v1alpha2
type StrategyParameters struct {
NodeResourceUtilizationThresholds *NodeResourceUtilizationThresholds
NodeAffinityType []string
PodsHavingTooManyRestarts *PodsHavingTooManyRestarts
MaxPodLifeTimeSeconds *uint
PodLifeTime *PodLifeTime
RemoveDuplicates *RemoveDuplicates
FailedPods *FailedPods
IncludeSoftConstraints bool
Namespaces *Namespaces
ThresholdPriority *int32
ThresholdPriorityClassName string
LabelSelector *metav1.LabelSelector
NodeFit bool
}
type Percentage float64
@@ -70,3 +101,15 @@ type PodsHavingTooManyRestarts struct {
type RemoveDuplicates struct {
ExcludeOwnerKinds []string
}
type PodLifeTime struct {
MaxPodLifeTimeSeconds *uint
PodStatusPhases []string
}
type FailedPods struct {
ExcludeOwnerKinds []string
MinPodLifetimeSeconds *uint
Reasons []string
IncludingInitContainers bool
}

View File

@@ -28,6 +28,21 @@ type DeschedulerPolicy struct {
// Strategies
Strategies StrategyList `json:"strategies,omitempty"`
// NodeSelector for a set of nodes to operate over
NodeSelector *string `json:"nodeSelector,omitempty"`
// EvictLocalStoragePods allows pods using local storage to be evicted.
EvictLocalStoragePods *bool `json:"evictLocalStoragePods,omitempty"`
// EvictSystemCriticalPods allows eviction of pods of any priority (including Kubernetes system pods)
EvictSystemCriticalPods *bool `json:"evictSystemCriticalPods,omitempty"`
// IgnorePVCPods prevents pods with PVCs from being evicted.
IgnorePVCPods *bool `json:"ignorePvcPods,omitempty"`
// MaxNoOfPodsToEvictPerNode restricts maximum of pods to be evicted per node.
MaxNoOfPodsToEvictPerNode *int `json:"maxNoOfPodsToEvictPerNode,omitempty"`
}
type StrategyName string
@@ -41,16 +56,30 @@ type DeschedulerStrategy struct {
Weight int `json:"weight,omitempty"`
// Strategy parameters
Params StrategyParameters `json:"params,omitempty"`
Params *StrategyParameters `json:"params,omitempty"`
}
// Only one of its members may be specified
// Namespaces carries a list of included/excluded namespaces
// for which a given strategy is applicable.
type Namespaces struct {
Include []string `json:"include"`
Exclude []string `json:"exclude"`
}
// Besides Namespaces ThresholdPriority and ThresholdPriorityClassName only one of its members may be specified
type StrategyParameters struct {
NodeResourceUtilizationThresholds *NodeResourceUtilizationThresholds `json:"nodeResourceUtilizationThresholds,omitempty"`
NodeAffinityType []string `json:"nodeAffinityType,omitempty"`
PodsHavingTooManyRestarts *PodsHavingTooManyRestarts `json:"podsHavingTooManyRestarts,omitempty"`
MaxPodLifeTimeSeconds *uint `json:"maxPodLifeTimeSeconds,omitempty"`
PodLifeTime *PodLifeTime `json:"podLifeTime,omitempty"`
RemoveDuplicates *RemoveDuplicates `json:"removeDuplicates,omitempty"`
FailedPods *FailedPods `json:"failedPods,omitempty"`
IncludeSoftConstraints bool `json:"includeSoftConstraints"`
Namespaces *Namespaces `json:"namespaces"`
ThresholdPriority *int32 `json:"thresholdPriority"`
ThresholdPriorityClassName string `json:"thresholdPriorityClassName"`
LabelSelector *metav1.LabelSelector `json:"labelSelector"`
NodeFit bool `json:"nodeFit"`
}
type Percentage float64
@@ -70,3 +99,15 @@ type PodsHavingTooManyRestarts struct {
type RemoveDuplicates struct {
ExcludeOwnerKinds []string `json:"excludeOwnerKinds,omitempty"`
}
type PodLifeTime struct {
MaxPodLifeTimeSeconds *uint `json:"maxPodLifeTimeSeconds,omitempty"`
PodStatusPhases []string `json:"podStatusPhases,omitempty"`
}
type FailedPods struct {
ExcludeOwnerKinds []string `json:"excludeOwnerKinds,omitempty"`
MinPodLifetimeSeconds *uint `json:"minPodLifetimeSeconds,omitempty"`
Reasons []string `json:"reasons,omitempty"`
IncludingInitContainers bool `json:"includingInitContainers,omitempty"`
}

View File

@@ -1,7 +1,7 @@
// +build !ignore_autogenerated
/*
Copyright 2020 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -23,6 +23,7 @@ package v1alpha1
import (
unsafe "unsafe"
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
conversion "k8s.io/apimachinery/pkg/conversion"
runtime "k8s.io/apimachinery/pkg/runtime"
api "sigs.k8s.io/descheduler/pkg/api"
@@ -55,6 +56,26 @@ func RegisterConversions(s *runtime.Scheme) error {
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*FailedPods)(nil), (*api.FailedPods)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha1_FailedPods_To_api_FailedPods(a.(*FailedPods), b.(*api.FailedPods), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*api.FailedPods)(nil), (*FailedPods)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_api_FailedPods_To_v1alpha1_FailedPods(a.(*api.FailedPods), b.(*FailedPods), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*Namespaces)(nil), (*api.Namespaces)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha1_Namespaces_To_api_Namespaces(a.(*Namespaces), b.(*api.Namespaces), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*api.Namespaces)(nil), (*Namespaces)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_api_Namespaces_To_v1alpha1_Namespaces(a.(*api.Namespaces), b.(*Namespaces), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*NodeResourceUtilizationThresholds)(nil), (*api.NodeResourceUtilizationThresholds)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha1_NodeResourceUtilizationThresholds_To_api_NodeResourceUtilizationThresholds(a.(*NodeResourceUtilizationThresholds), b.(*api.NodeResourceUtilizationThresholds), scope)
}); err != nil {
@@ -65,6 +86,16 @@ func RegisterConversions(s *runtime.Scheme) error {
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*PodLifeTime)(nil), (*api.PodLifeTime)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha1_PodLifeTime_To_api_PodLifeTime(a.(*PodLifeTime), b.(*api.PodLifeTime), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*api.PodLifeTime)(nil), (*PodLifeTime)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_api_PodLifeTime_To_v1alpha1_PodLifeTime(a.(*api.PodLifeTime), b.(*PodLifeTime), scope)
}); err != nil {
return err
}
if err := s.AddGeneratedConversionFunc((*PodsHavingTooManyRestarts)(nil), (*api.PodsHavingTooManyRestarts)(nil), func(a, b interface{}, scope conversion.Scope) error {
return Convert_v1alpha1_PodsHavingTooManyRestarts_To_api_PodsHavingTooManyRestarts(a.(*PodsHavingTooManyRestarts), b.(*api.PodsHavingTooManyRestarts), scope)
}); err != nil {
@@ -100,6 +131,11 @@ func RegisterConversions(s *runtime.Scheme) error {
func autoConvert_v1alpha1_DeschedulerPolicy_To_api_DeschedulerPolicy(in *DeschedulerPolicy, out *api.DeschedulerPolicy, s conversion.Scope) error {
out.Strategies = *(*api.StrategyList)(unsafe.Pointer(&in.Strategies))
out.NodeSelector = (*string)(unsafe.Pointer(in.NodeSelector))
out.EvictLocalStoragePods = (*bool)(unsafe.Pointer(in.EvictLocalStoragePods))
out.EvictSystemCriticalPods = (*bool)(unsafe.Pointer(in.EvictSystemCriticalPods))
out.IgnorePVCPods = (*bool)(unsafe.Pointer(in.IgnorePVCPods))
out.MaxNoOfPodsToEvictPerNode = (*int)(unsafe.Pointer(in.MaxNoOfPodsToEvictPerNode))
return nil
}
@@ -110,6 +146,11 @@ func Convert_v1alpha1_DeschedulerPolicy_To_api_DeschedulerPolicy(in *Descheduler
func autoConvert_api_DeschedulerPolicy_To_v1alpha1_DeschedulerPolicy(in *api.DeschedulerPolicy, out *DeschedulerPolicy, s conversion.Scope) error {
out.Strategies = *(*StrategyList)(unsafe.Pointer(&in.Strategies))
out.NodeSelector = (*string)(unsafe.Pointer(in.NodeSelector))
out.EvictLocalStoragePods = (*bool)(unsafe.Pointer(in.EvictLocalStoragePods))
out.EvictSystemCriticalPods = (*bool)(unsafe.Pointer(in.EvictSystemCriticalPods))
out.IgnorePVCPods = (*bool)(unsafe.Pointer(in.IgnorePVCPods))
out.MaxNoOfPodsToEvictPerNode = (*int)(unsafe.Pointer(in.MaxNoOfPodsToEvictPerNode))
return nil
}
@@ -121,9 +162,7 @@ func Convert_api_DeschedulerPolicy_To_v1alpha1_DeschedulerPolicy(in *api.Desched
func autoConvert_v1alpha1_DeschedulerStrategy_To_api_DeschedulerStrategy(in *DeschedulerStrategy, out *api.DeschedulerStrategy, s conversion.Scope) error {
out.Enabled = in.Enabled
out.Weight = in.Weight
if err := Convert_v1alpha1_StrategyParameters_To_api_StrategyParameters(&in.Params, &out.Params, s); err != nil {
return err
}
out.Params = (*api.StrategyParameters)(unsafe.Pointer(in.Params))
return nil
}
@@ -135,9 +174,7 @@ func Convert_v1alpha1_DeschedulerStrategy_To_api_DeschedulerStrategy(in *Desched
func autoConvert_api_DeschedulerStrategy_To_v1alpha1_DeschedulerStrategy(in *api.DeschedulerStrategy, out *DeschedulerStrategy, s conversion.Scope) error {
out.Enabled = in.Enabled
out.Weight = in.Weight
if err := Convert_api_StrategyParameters_To_v1alpha1_StrategyParameters(&in.Params, &out.Params, s); err != nil {
return err
}
out.Params = (*StrategyParameters)(unsafe.Pointer(in.Params))
return nil
}
@@ -146,6 +183,54 @@ func Convert_api_DeschedulerStrategy_To_v1alpha1_DeschedulerStrategy(in *api.Des
return autoConvert_api_DeschedulerStrategy_To_v1alpha1_DeschedulerStrategy(in, out, s)
}
func autoConvert_v1alpha1_FailedPods_To_api_FailedPods(in *FailedPods, out *api.FailedPods, s conversion.Scope) error {
out.ExcludeOwnerKinds = *(*[]string)(unsafe.Pointer(&in.ExcludeOwnerKinds))
out.MinPodLifetimeSeconds = (*uint)(unsafe.Pointer(in.MinPodLifetimeSeconds))
out.Reasons = *(*[]string)(unsafe.Pointer(&in.Reasons))
out.IncludingInitContainers = in.IncludingInitContainers
return nil
}
// Convert_v1alpha1_FailedPods_To_api_FailedPods is an autogenerated conversion function.
func Convert_v1alpha1_FailedPods_To_api_FailedPods(in *FailedPods, out *api.FailedPods, s conversion.Scope) error {
return autoConvert_v1alpha1_FailedPods_To_api_FailedPods(in, out, s)
}
func autoConvert_api_FailedPods_To_v1alpha1_FailedPods(in *api.FailedPods, out *FailedPods, s conversion.Scope) error {
out.ExcludeOwnerKinds = *(*[]string)(unsafe.Pointer(&in.ExcludeOwnerKinds))
out.MinPodLifetimeSeconds = (*uint)(unsafe.Pointer(in.MinPodLifetimeSeconds))
out.Reasons = *(*[]string)(unsafe.Pointer(&in.Reasons))
out.IncludingInitContainers = in.IncludingInitContainers
return nil
}
// Convert_api_FailedPods_To_v1alpha1_FailedPods is an autogenerated conversion function.
func Convert_api_FailedPods_To_v1alpha1_FailedPods(in *api.FailedPods, out *FailedPods, s conversion.Scope) error {
return autoConvert_api_FailedPods_To_v1alpha1_FailedPods(in, out, s)
}
func autoConvert_v1alpha1_Namespaces_To_api_Namespaces(in *Namespaces, out *api.Namespaces, s conversion.Scope) error {
out.Include = *(*[]string)(unsafe.Pointer(&in.Include))
out.Exclude = *(*[]string)(unsafe.Pointer(&in.Exclude))
return nil
}
// Convert_v1alpha1_Namespaces_To_api_Namespaces is an autogenerated conversion function.
func Convert_v1alpha1_Namespaces_To_api_Namespaces(in *Namespaces, out *api.Namespaces, s conversion.Scope) error {
return autoConvert_v1alpha1_Namespaces_To_api_Namespaces(in, out, s)
}
func autoConvert_api_Namespaces_To_v1alpha1_Namespaces(in *api.Namespaces, out *Namespaces, s conversion.Scope) error {
out.Include = *(*[]string)(unsafe.Pointer(&in.Include))
out.Exclude = *(*[]string)(unsafe.Pointer(&in.Exclude))
return nil
}
// Convert_api_Namespaces_To_v1alpha1_Namespaces is an autogenerated conversion function.
func Convert_api_Namespaces_To_v1alpha1_Namespaces(in *api.Namespaces, out *Namespaces, s conversion.Scope) error {
return autoConvert_api_Namespaces_To_v1alpha1_Namespaces(in, out, s)
}
func autoConvert_v1alpha1_NodeResourceUtilizationThresholds_To_api_NodeResourceUtilizationThresholds(in *NodeResourceUtilizationThresholds, out *api.NodeResourceUtilizationThresholds, s conversion.Scope) error {
out.Thresholds = *(*api.ResourceThresholds)(unsafe.Pointer(&in.Thresholds))
out.TargetThresholds = *(*api.ResourceThresholds)(unsafe.Pointer(&in.TargetThresholds))
@@ -170,6 +255,28 @@ func Convert_api_NodeResourceUtilizationThresholds_To_v1alpha1_NodeResourceUtili
return autoConvert_api_NodeResourceUtilizationThresholds_To_v1alpha1_NodeResourceUtilizationThresholds(in, out, s)
}
func autoConvert_v1alpha1_PodLifeTime_To_api_PodLifeTime(in *PodLifeTime, out *api.PodLifeTime, s conversion.Scope) error {
out.MaxPodLifeTimeSeconds = (*uint)(unsafe.Pointer(in.MaxPodLifeTimeSeconds))
out.PodStatusPhases = *(*[]string)(unsafe.Pointer(&in.PodStatusPhases))
return nil
}
// Convert_v1alpha1_PodLifeTime_To_api_PodLifeTime is an autogenerated conversion function.
func Convert_v1alpha1_PodLifeTime_To_api_PodLifeTime(in *PodLifeTime, out *api.PodLifeTime, s conversion.Scope) error {
return autoConvert_v1alpha1_PodLifeTime_To_api_PodLifeTime(in, out, s)
}
func autoConvert_api_PodLifeTime_To_v1alpha1_PodLifeTime(in *api.PodLifeTime, out *PodLifeTime, s conversion.Scope) error {
out.MaxPodLifeTimeSeconds = (*uint)(unsafe.Pointer(in.MaxPodLifeTimeSeconds))
out.PodStatusPhases = *(*[]string)(unsafe.Pointer(&in.PodStatusPhases))
return nil
}
// Convert_api_PodLifeTime_To_v1alpha1_PodLifeTime is an autogenerated conversion function.
func Convert_api_PodLifeTime_To_v1alpha1_PodLifeTime(in *api.PodLifeTime, out *PodLifeTime, s conversion.Scope) error {
return autoConvert_api_PodLifeTime_To_v1alpha1_PodLifeTime(in, out, s)
}
func autoConvert_v1alpha1_PodsHavingTooManyRestarts_To_api_PodsHavingTooManyRestarts(in *PodsHavingTooManyRestarts, out *api.PodsHavingTooManyRestarts, s conversion.Scope) error {
out.PodRestartThreshold = in.PodRestartThreshold
out.IncludingInitContainers = in.IncludingInitContainers
@@ -216,8 +323,15 @@ func autoConvert_v1alpha1_StrategyParameters_To_api_StrategyParameters(in *Strat
out.NodeResourceUtilizationThresholds = (*api.NodeResourceUtilizationThresholds)(unsafe.Pointer(in.NodeResourceUtilizationThresholds))
out.NodeAffinityType = *(*[]string)(unsafe.Pointer(&in.NodeAffinityType))
out.PodsHavingTooManyRestarts = (*api.PodsHavingTooManyRestarts)(unsafe.Pointer(in.PodsHavingTooManyRestarts))
out.MaxPodLifeTimeSeconds = (*uint)(unsafe.Pointer(in.MaxPodLifeTimeSeconds))
out.PodLifeTime = (*api.PodLifeTime)(unsafe.Pointer(in.PodLifeTime))
out.RemoveDuplicates = (*api.RemoveDuplicates)(unsafe.Pointer(in.RemoveDuplicates))
out.FailedPods = (*api.FailedPods)(unsafe.Pointer(in.FailedPods))
out.IncludeSoftConstraints = in.IncludeSoftConstraints
out.Namespaces = (*api.Namespaces)(unsafe.Pointer(in.Namespaces))
out.ThresholdPriority = (*int32)(unsafe.Pointer(in.ThresholdPriority))
out.ThresholdPriorityClassName = in.ThresholdPriorityClassName
out.LabelSelector = (*v1.LabelSelector)(unsafe.Pointer(in.LabelSelector))
out.NodeFit = in.NodeFit
return nil
}
@@ -230,8 +344,15 @@ func autoConvert_api_StrategyParameters_To_v1alpha1_StrategyParameters(in *api.S
out.NodeResourceUtilizationThresholds = (*NodeResourceUtilizationThresholds)(unsafe.Pointer(in.NodeResourceUtilizationThresholds))
out.NodeAffinityType = *(*[]string)(unsafe.Pointer(&in.NodeAffinityType))
out.PodsHavingTooManyRestarts = (*PodsHavingTooManyRestarts)(unsafe.Pointer(in.PodsHavingTooManyRestarts))
out.MaxPodLifeTimeSeconds = (*uint)(unsafe.Pointer(in.MaxPodLifeTimeSeconds))
out.PodLifeTime = (*PodLifeTime)(unsafe.Pointer(in.PodLifeTime))
out.RemoveDuplicates = (*RemoveDuplicates)(unsafe.Pointer(in.RemoveDuplicates))
out.FailedPods = (*FailedPods)(unsafe.Pointer(in.FailedPods))
out.IncludeSoftConstraints = in.IncludeSoftConstraints
out.Namespaces = (*Namespaces)(unsafe.Pointer(in.Namespaces))
out.ThresholdPriority = (*int32)(unsafe.Pointer(in.ThresholdPriority))
out.ThresholdPriorityClassName = in.ThresholdPriorityClassName
out.LabelSelector = (*v1.LabelSelector)(unsafe.Pointer(in.LabelSelector))
out.NodeFit = in.NodeFit
return nil
}

View File

@@ -1,7 +1,7 @@
// +build !ignore_autogenerated
/*
Copyright 2020 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -21,6 +21,7 @@ limitations under the License.
package v1alpha1
import (
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtime "k8s.io/apimachinery/pkg/runtime"
)
@@ -35,6 +36,31 @@ func (in *DeschedulerPolicy) DeepCopyInto(out *DeschedulerPolicy) {
(*out)[key] = *val.DeepCopy()
}
}
if in.NodeSelector != nil {
in, out := &in.NodeSelector, &out.NodeSelector
*out = new(string)
**out = **in
}
if in.EvictLocalStoragePods != nil {
in, out := &in.EvictLocalStoragePods, &out.EvictLocalStoragePods
*out = new(bool)
**out = **in
}
if in.EvictSystemCriticalPods != nil {
in, out := &in.EvictSystemCriticalPods, &out.EvictSystemCriticalPods
*out = new(bool)
**out = **in
}
if in.IgnorePVCPods != nil {
in, out := &in.IgnorePVCPods, &out.IgnorePVCPods
*out = new(bool)
**out = **in
}
if in.MaxNoOfPodsToEvictPerNode != nil {
in, out := &in.MaxNoOfPodsToEvictPerNode, &out.MaxNoOfPodsToEvictPerNode
*out = new(int)
**out = **in
}
return
}
@@ -59,7 +85,11 @@ func (in *DeschedulerPolicy) DeepCopyObject() runtime.Object {
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *DeschedulerStrategy) DeepCopyInto(out *DeschedulerStrategy) {
*out = *in
in.Params.DeepCopyInto(&out.Params)
if in.Params != nil {
in, out := &in.Params, &out.Params
*out = new(StrategyParameters)
(*in).DeepCopyInto(*out)
}
return
}
@@ -73,6 +103,63 @@ func (in *DeschedulerStrategy) DeepCopy() *DeschedulerStrategy {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *FailedPods) DeepCopyInto(out *FailedPods) {
*out = *in
if in.ExcludeOwnerKinds != nil {
in, out := &in.ExcludeOwnerKinds, &out.ExcludeOwnerKinds
*out = make([]string, len(*in))
copy(*out, *in)
}
if in.MinPodLifetimeSeconds != nil {
in, out := &in.MinPodLifetimeSeconds, &out.MinPodLifetimeSeconds
*out = new(uint)
**out = **in
}
if in.Reasons != nil {
in, out := &in.Reasons, &out.Reasons
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FailedPods.
func (in *FailedPods) DeepCopy() *FailedPods {
if in == nil {
return nil
}
out := new(FailedPods)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Namespaces) DeepCopyInto(out *Namespaces) {
*out = *in
if in.Include != nil {
in, out := &in.Include, &out.Include
*out = make([]string, len(*in))
copy(*out, *in)
}
if in.Exclude != nil {
in, out := &in.Exclude, &out.Exclude
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Namespaces.
func (in *Namespaces) DeepCopy() *Namespaces {
if in == nil {
return nil
}
out := new(Namespaces)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *NodeResourceUtilizationThresholds) DeepCopyInto(out *NodeResourceUtilizationThresholds) {
*out = *in
@@ -103,6 +190,32 @@ func (in *NodeResourceUtilizationThresholds) DeepCopy() *NodeResourceUtilization
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PodLifeTime) DeepCopyInto(out *PodLifeTime) {
*out = *in
if in.MaxPodLifeTimeSeconds != nil {
in, out := &in.MaxPodLifeTimeSeconds, &out.MaxPodLifeTimeSeconds
*out = new(uint)
**out = **in
}
if in.PodStatusPhases != nil {
in, out := &in.PodStatusPhases, &out.PodStatusPhases
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodLifeTime.
func (in *PodLifeTime) DeepCopy() *PodLifeTime {
if in == nil {
return nil
}
out := new(PodLifeTime)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PodsHavingTooManyRestarts) DeepCopyInto(out *PodsHavingTooManyRestarts) {
*out = *in
@@ -202,16 +315,36 @@ func (in *StrategyParameters) DeepCopyInto(out *StrategyParameters) {
*out = new(PodsHavingTooManyRestarts)
**out = **in
}
if in.MaxPodLifeTimeSeconds != nil {
in, out := &in.MaxPodLifeTimeSeconds, &out.MaxPodLifeTimeSeconds
*out = new(uint)
**out = **in
if in.PodLifeTime != nil {
in, out := &in.PodLifeTime, &out.PodLifeTime
*out = new(PodLifeTime)
(*in).DeepCopyInto(*out)
}
if in.RemoveDuplicates != nil {
in, out := &in.RemoveDuplicates, &out.RemoveDuplicates
*out = new(RemoveDuplicates)
(*in).DeepCopyInto(*out)
}
if in.FailedPods != nil {
in, out := &in.FailedPods, &out.FailedPods
*out = new(FailedPods)
(*in).DeepCopyInto(*out)
}
if in.Namespaces != nil {
in, out := &in.Namespaces, &out.Namespaces
*out = new(Namespaces)
(*in).DeepCopyInto(*out)
}
if in.ThresholdPriority != nil {
in, out := &in.ThresholdPriority, &out.ThresholdPriority
*out = new(int32)
**out = **in
}
if in.LabelSelector != nil {
in, out := &in.LabelSelector, &out.LabelSelector
*out = new(v1.LabelSelector)
(*in).DeepCopyInto(*out)
}
return
}

View File

@@ -1,7 +1,7 @@
// +build !ignore_autogenerated
/*
Copyright 2020 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@@ -1,7 +1,7 @@
// +build !ignore_autogenerated
/*
Copyright 2020 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -21,6 +21,7 @@ limitations under the License.
package api
import (
v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
runtime "k8s.io/apimachinery/pkg/runtime"
)
@@ -35,6 +36,31 @@ func (in *DeschedulerPolicy) DeepCopyInto(out *DeschedulerPolicy) {
(*out)[key] = *val.DeepCopy()
}
}
if in.NodeSelector != nil {
in, out := &in.NodeSelector, &out.NodeSelector
*out = new(string)
**out = **in
}
if in.EvictLocalStoragePods != nil {
in, out := &in.EvictLocalStoragePods, &out.EvictLocalStoragePods
*out = new(bool)
**out = **in
}
if in.EvictSystemCriticalPods != nil {
in, out := &in.EvictSystemCriticalPods, &out.EvictSystemCriticalPods
*out = new(bool)
**out = **in
}
if in.IgnorePVCPods != nil {
in, out := &in.IgnorePVCPods, &out.IgnorePVCPods
*out = new(bool)
**out = **in
}
if in.MaxNoOfPodsToEvictPerNode != nil {
in, out := &in.MaxNoOfPodsToEvictPerNode, &out.MaxNoOfPodsToEvictPerNode
*out = new(int)
**out = **in
}
return
}
@@ -59,7 +85,11 @@ func (in *DeschedulerPolicy) DeepCopyObject() runtime.Object {
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *DeschedulerStrategy) DeepCopyInto(out *DeschedulerStrategy) {
*out = *in
in.Params.DeepCopyInto(&out.Params)
if in.Params != nil {
in, out := &in.Params, &out.Params
*out = new(StrategyParameters)
(*in).DeepCopyInto(*out)
}
return
}
@@ -73,6 +103,63 @@ func (in *DeschedulerStrategy) DeepCopy() *DeschedulerStrategy {
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *FailedPods) DeepCopyInto(out *FailedPods) {
*out = *in
if in.ExcludeOwnerKinds != nil {
in, out := &in.ExcludeOwnerKinds, &out.ExcludeOwnerKinds
*out = make([]string, len(*in))
copy(*out, *in)
}
if in.MinPodLifetimeSeconds != nil {
in, out := &in.MinPodLifetimeSeconds, &out.MinPodLifetimeSeconds
*out = new(uint)
**out = **in
}
if in.Reasons != nil {
in, out := &in.Reasons, &out.Reasons
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new FailedPods.
func (in *FailedPods) DeepCopy() *FailedPods {
if in == nil {
return nil
}
out := new(FailedPods)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *Namespaces) DeepCopyInto(out *Namespaces) {
*out = *in
if in.Include != nil {
in, out := &in.Include, &out.Include
*out = make([]string, len(*in))
copy(*out, *in)
}
if in.Exclude != nil {
in, out := &in.Exclude, &out.Exclude
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Namespaces.
func (in *Namespaces) DeepCopy() *Namespaces {
if in == nil {
return nil
}
out := new(Namespaces)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *NodeResourceUtilizationThresholds) DeepCopyInto(out *NodeResourceUtilizationThresholds) {
*out = *in
@@ -103,6 +190,32 @@ func (in *NodeResourceUtilizationThresholds) DeepCopy() *NodeResourceUtilization
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PodLifeTime) DeepCopyInto(out *PodLifeTime) {
*out = *in
if in.MaxPodLifeTimeSeconds != nil {
in, out := &in.MaxPodLifeTimeSeconds, &out.MaxPodLifeTimeSeconds
*out = new(uint)
**out = **in
}
if in.PodStatusPhases != nil {
in, out := &in.PodStatusPhases, &out.PodStatusPhases
*out = make([]string, len(*in))
copy(*out, *in)
}
return
}
// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PodLifeTime.
func (in *PodLifeTime) DeepCopy() *PodLifeTime {
if in == nil {
return nil
}
out := new(PodLifeTime)
in.DeepCopyInto(out)
return out
}
// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil.
func (in *PodsHavingTooManyRestarts) DeepCopyInto(out *PodsHavingTooManyRestarts) {
*out = *in
@@ -202,16 +315,36 @@ func (in *StrategyParameters) DeepCopyInto(out *StrategyParameters) {
*out = new(PodsHavingTooManyRestarts)
**out = **in
}
if in.MaxPodLifeTimeSeconds != nil {
in, out := &in.MaxPodLifeTimeSeconds, &out.MaxPodLifeTimeSeconds
*out = new(uint)
**out = **in
if in.PodLifeTime != nil {
in, out := &in.PodLifeTime, &out.PodLifeTime
*out = new(PodLifeTime)
(*in).DeepCopyInto(*out)
}
if in.RemoveDuplicates != nil {
in, out := &in.RemoveDuplicates, &out.RemoveDuplicates
*out = new(RemoveDuplicates)
(*in).DeepCopyInto(*out)
}
if in.FailedPods != nil {
in, out := &in.FailedPods, &out.FailedPods
*out = new(FailedPods)
(*in).DeepCopyInto(*out)
}
if in.Namespaces != nil {
in, out := &in.Namespaces, &out.Namespaces
*out = new(Namespaces)
(*in).DeepCopyInto(*out)
}
if in.ThresholdPriority != nil {
in, out := &in.ThresholdPriority, &out.ThresholdPriority
*out = new(int32)
**out = **in
}
if in.LabelSelector != nil {
in, out := &in.LabelSelector, &out.LabelSelector
*out = new(v1.LabelSelector)
(*in).DeepCopyInto(*out)
}
return
}

View File

@@ -19,8 +19,6 @@ package componentconfig
import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
"sigs.k8s.io/descheduler/pkg/descheduler/scheme"
)
var (
@@ -34,12 +32,6 @@ const GroupName = "deschedulercomponentconfig"
// SchemeGroupVersion is group version used to register these objects
var SchemeGroupVersion = schema.GroupVersion{Group: GroupName, Version: runtime.APIVersionInternal}
func init() {
if err := addKnownTypes(scheme.Scheme); err != nil {
panic(err)
}
}
// Kind takes an unqualified kind and returns a Group qualified GroupKind
func Kind(kind string) schema.GroupKind {
return SchemeGroupVersion.WithKind(kind).GroupKind()

View File

@@ -20,6 +20,7 @@ import (
"time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
componentbaseconfig "k8s.io/component-base/config"
)
// +k8s:deepcopy-gen:interfaces=k8s.io/apimachinery/pkg/runtime.Object
@@ -48,4 +49,11 @@ type DeschedulerConfiguration struct {
// EvictLocalStoragePods allows pods using local storage to be evicted.
EvictLocalStoragePods bool
// IgnorePVCPods sets whether PVC pods should be allowed to be evicted
IgnorePVCPods bool
// Logging specifies the options of logging.
// Refer [Logs Options](https://github.com/kubernetes/component-base/blob/master/logs/options.go) for more information.
Logging componentbaseconfig.LoggingConfiguration
}

View File

@@ -17,6 +17,7 @@ limitations under the License.
package v1alpha1
import (
componentbaseconfig "k8s.io/component-base/config"
"time"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -48,4 +49,11 @@ type DeschedulerConfiguration struct {
// EvictLocalStoragePods allows pods using local storage to be evicted.
EvictLocalStoragePods bool `json:"evictLocalStoragePods,omitempty"`
// IgnorePVCPods sets whether PVC pods should be allowed to be evicted
IgnorePVCPods bool `json:"ignorePvcPods,omitempty"`
// Logging specifies the options of logging.
// Refer [Logs Options](https://github.com/kubernetes/component-base/blob/master/logs/options.go) for more information.
Logging componentbaseconfig.LoggingConfiguration `json:"logging,omitempty"`
}

View File

@@ -1,7 +1,7 @@
// +build !ignore_autogenerated
/*
Copyright 2020 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -56,6 +56,8 @@ func autoConvert_v1alpha1_DeschedulerConfiguration_To_componentconfig_Deschedule
out.NodeSelector = in.NodeSelector
out.MaxNoOfPodsToEvictPerNode = in.MaxNoOfPodsToEvictPerNode
out.EvictLocalStoragePods = in.EvictLocalStoragePods
out.IgnorePVCPods = in.IgnorePVCPods
out.Logging = in.Logging
return nil
}
@@ -72,6 +74,8 @@ func autoConvert_componentconfig_DeschedulerConfiguration_To_v1alpha1_Deschedule
out.NodeSelector = in.NodeSelector
out.MaxNoOfPodsToEvictPerNode = in.MaxNoOfPodsToEvictPerNode
out.EvictLocalStoragePods = in.EvictLocalStoragePods
out.IgnorePVCPods = in.IgnorePVCPods
out.Logging = in.Logging
return nil
}

View File

@@ -1,7 +1,7 @@
// +build !ignore_autogenerated
/*
Copyright 2020 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -28,6 +28,7 @@ import (
func (in *DeschedulerConfiguration) DeepCopyInto(out *DeschedulerConfiguration) {
*out = *in
out.TypeMeta = in.TypeMeta
out.Logging = in.Logging
return
}

View File

@@ -1,7 +1,7 @@
// +build !ignore_autogenerated
/*
Copyright 2020 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.

View File

@@ -1,7 +1,7 @@
// +build !ignore_autogenerated
/*
Copyright 2020 The Kubernetes Authors.
Copyright 2022 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
@@ -28,6 +28,7 @@ import (
func (in *DeschedulerConfiguration) DeepCopyInto(out *DeschedulerConfiguration) {
*out = *in
out.TypeMeta = in.TypeMeta
out.Logging = in.Logging
return
}

View File

@@ -19,14 +19,16 @@ package descheduler
import (
"context"
"fmt"
"sigs.k8s.io/descheduler/pkg/descheduler/strategies/nodeutilization"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog"
"k8s.io/klog/v2"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/informers"
"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
"sigs.k8s.io/descheduler/metrics"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/client"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
@@ -36,6 +38,8 @@ import (
)
func Run(rs *options.DeschedulerServer) error {
metrics.Register()
ctx := context.Background()
rsclient, err := client.CreateClient(rs.KubeconfigFile)
if err != nil {
@@ -60,7 +64,7 @@ func Run(rs *options.DeschedulerServer) error {
return RunDeschedulerStrategies(ctx, rs, deschedulerPolicy, evictionPolicyGroupVersion, stopChannel)
}
type strategyFunction func(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, evictLocalStoragePods bool, podEvictor *evictions.PodEvictor)
type strategyFunction func(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor)
func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer, deschedulerPolicy *api.DeschedulerPolicy, evictionPolicyGroupVersion string, stopChannel chan struct{}) error {
sharedInformerFactory := informers.NewSharedInformerFactory(rs.Client, 0)
@@ -69,26 +73,57 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer
sharedInformerFactory.Start(stopChannel)
sharedInformerFactory.WaitForCacheSync(stopChannel)
strategyFuncs := map[string]strategyFunction{
"RemoveDuplicates": strategies.RemoveDuplicatePods,
"LowNodeUtilization": strategies.LowNodeUtilization,
"RemovePodsViolatingInterPodAntiAffinity": strategies.RemovePodsViolatingInterPodAntiAffinity,
"RemovePodsViolatingNodeAffinity": strategies.RemovePodsViolatingNodeAffinity,
"RemovePodsViolatingNodeTaints": strategies.RemovePodsViolatingNodeTaints,
"RemovePodsHavingTooManyRestarts": strategies.RemovePodsHavingTooManyRestarts,
"PodLifeTime": strategies.PodLifeTime,
strategyFuncs := map[api.StrategyName]strategyFunction{
"RemoveDuplicates": strategies.RemoveDuplicatePods,
"LowNodeUtilization": nodeutilization.LowNodeUtilization,
"HighNodeUtilization": nodeutilization.HighNodeUtilization,
"RemovePodsViolatingInterPodAntiAffinity": strategies.RemovePodsViolatingInterPodAntiAffinity,
"RemovePodsViolatingNodeAffinity": strategies.RemovePodsViolatingNodeAffinity,
"RemovePodsViolatingNodeTaints": strategies.RemovePodsViolatingNodeTaints,
"RemovePodsHavingTooManyRestarts": strategies.RemovePodsHavingTooManyRestarts,
"PodLifeTime": strategies.PodLifeTime,
"RemovePodsViolatingTopologySpreadConstraint": strategies.RemovePodsViolatingTopologySpreadConstraint,
"RemoveFailedPods": strategies.RemoveFailedPods,
}
nodeSelector := rs.NodeSelector
if deschedulerPolicy.NodeSelector != nil {
nodeSelector = *deschedulerPolicy.NodeSelector
}
evictLocalStoragePods := rs.EvictLocalStoragePods
if deschedulerPolicy.EvictLocalStoragePods != nil {
evictLocalStoragePods = *deschedulerPolicy.EvictLocalStoragePods
}
evictSystemCriticalPods := false
if deschedulerPolicy.EvictSystemCriticalPods != nil {
evictSystemCriticalPods = *deschedulerPolicy.EvictSystemCriticalPods
if evictSystemCriticalPods {
klog.V(1).InfoS("Warning: EvictSystemCriticalPods is set to True. This could cause eviction of Kubernetes system pods.")
}
}
ignorePvcPods := false
if deschedulerPolicy.IgnorePVCPods != nil {
ignorePvcPods = *deschedulerPolicy.IgnorePVCPods
}
maxNoOfPodsToEvictPerNode := rs.MaxNoOfPodsToEvictPerNode
if deschedulerPolicy.MaxNoOfPodsToEvictPerNode != nil {
maxNoOfPodsToEvictPerNode = *deschedulerPolicy.MaxNoOfPodsToEvictPerNode
}
wait.Until(func() {
nodes, err := nodeutil.ReadyNodes(ctx, rs.Client, nodeInformer, rs.NodeSelector, stopChannel)
nodes, err := nodeutil.ReadyNodes(ctx, rs.Client, nodeInformer, nodeSelector)
if err != nil {
klog.V(1).Infof("Unable to get ready nodes: %v", err)
klog.V(1).InfoS("Unable to get ready nodes", "err", err)
close(stopChannel)
return
}
if len(nodes) <= 1 {
klog.V(1).Infof("The cluster size is 0 or 1 meaning eviction causes service disruption or degradation. So aborting..")
klog.V(1).InfoS("The cluster size is 0 or 1 meaning eviction causes service disruption or degradation. So aborting..")
close(stopChannel)
return
}
@@ -97,16 +132,25 @@ func RunDeschedulerStrategies(ctx context.Context, rs *options.DeschedulerServer
rs.Client,
evictionPolicyGroupVersion,
rs.DryRun,
rs.MaxNoOfPodsToEvictPerNode,
maxNoOfPodsToEvictPerNode,
nodes,
evictLocalStoragePods,
evictSystemCriticalPods,
ignorePvcPods,
)
for name, f := range strategyFuncs {
if strategy := deschedulerPolicy.Strategies[api.StrategyName(name)]; strategy.Enabled {
f(ctx, rs.Client, strategy, nodes, rs.EvictLocalStoragePods, podEvictor)
for name, strategy := range deschedulerPolicy.Strategies {
if f, ok := strategyFuncs[name]; ok {
if strategy.Enabled {
f(ctx, rs.Client, strategy, nodes, podEvictor)
}
} else {
klog.ErrorS(fmt.Errorf("unknown strategy name"), "skipping strategy", "strategy", name)
}
}
klog.V(1).InfoS("Number of evicted pods", "totalEvicted", podEvictor.TotalEvicted())
// If there was no interval specified, send a signal to the stopChannel to end the wait.Until loop after 1 iteration
if rs.DeschedulingInterval.Seconds() == 0 {
close(stopChannel)

View File

@@ -7,7 +7,7 @@ import (
"testing"
"time"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
fakeclientset "k8s.io/client-go/kubernetes/fake"
@@ -38,18 +38,27 @@ func TestTaintsUpdated(t *testing.T) {
stopChannel := make(chan struct{})
defer close(stopChannel)
rs := options.NewDeschedulerServer()
rs, err := options.NewDeschedulerServer()
if err != nil {
t.Fatalf("Unable to initialize server: %v", err)
}
rs.Client = client
rs.DeschedulingInterval = 100 * time.Millisecond
errChan := make(chan error, 1)
defer close(errChan)
go func() {
err := RunDeschedulerStrategies(ctx, rs, dp, "v1beta1", stopChannel)
errChan <- err
}()
select {
case err := <-errChan:
if err != nil {
t.Fatalf("Unable to run descheduler strategies: %v", err)
}
}()
case <-time.After(300 * time.Millisecond):
// Wait for few cycles and then verify the only pod still exists
}
// Wait for few cycles and then verify the only pod still exists
time.Sleep(300 * time.Millisecond)
pods, err := client.CoreV1().Pods(p1.Namespace).List(ctx, metav1.ListOptions{})
if err != nil {
t.Errorf("Unable to list pods: %v", err)

View File

@@ -19,37 +19,55 @@ package evictions
import (
"context"
"fmt"
"strings"
v1 "k8s.io/api/core/v1"
policy "k8s.io/api/policy/v1beta1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/errors"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/client-go/kubernetes/scheme"
clientcorev1 "k8s.io/client-go/kubernetes/typed/core/v1"
"k8s.io/client-go/tools/record"
"k8s.io/klog"
"k8s.io/klog/v2"
"sigs.k8s.io/descheduler/metrics"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
eutils "sigs.k8s.io/descheduler/pkg/descheduler/evictions/utils"
)
const (
evictPodAnnotationKey = "descheduler.alpha.kubernetes.io/evict"
)
// nodePodEvictedCount keeps count of pods evicted on node
type nodePodEvictedCount map[*v1.Node]int
type PodEvictor struct {
client clientset.Interface
policyGroupVersion string
dryRun bool
maxPodsToEvict int
nodepodCount nodePodEvictedCount
client clientset.Interface
nodes []*v1.Node
policyGroupVersion string
dryRun bool
maxPodsToEvictPerNode int
nodepodCount nodePodEvictedCount
evictLocalStoragePods bool
evictSystemCriticalPods bool
ignorePvcPods bool
}
func NewPodEvictor(
client clientset.Interface,
policyGroupVersion string,
dryRun bool,
maxPodsToEvict int,
maxPodsToEvictPerNode int,
nodes []*v1.Node,
evictLocalStoragePods bool,
evictSystemCriticalPods bool,
ignorePvcPods bool,
) *PodEvictor {
var nodePodCount = make(nodePodEvictedCount)
for _, node := range nodes {
@@ -58,11 +76,15 @@ func NewPodEvictor(
}
return &PodEvictor{
client: client,
policyGroupVersion: policyGroupVersion,
dryRun: dryRun,
maxPodsToEvict: maxPodsToEvict,
nodepodCount: nodePodCount,
client: client,
nodes: nodes,
policyGroupVersion: policyGroupVersion,
dryRun: dryRun,
maxPodsToEvictPerNode: maxPodsToEvictPerNode,
nodepodCount: nodePodCount,
evictLocalStoragePods: evictLocalStoragePods,
evictSystemCriticalPods: evictSystemCriticalPods,
ignorePvcPods: ignorePvcPods,
}
}
@@ -81,25 +103,37 @@ func (pe *PodEvictor) TotalEvicted() int {
}
// EvictPod returns non-nil error only when evicting a pod on a node is not
// possible (due to maxPodsToEvict constraint). Success is true when the pod
// possible (due to maxPodsToEvictPerNode constraint). Success is true when the pod
// is evicted on the server side.
func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, node *v1.Node) (bool, error) {
if pe.maxPodsToEvict > 0 && pe.nodepodCount[node]+1 > pe.maxPodsToEvict {
return false, fmt.Errorf("Maximum number %v of evicted pods per %q node reached", pe.maxPodsToEvict, node.Name)
func (pe *PodEvictor) EvictPod(ctx context.Context, pod *v1.Pod, node *v1.Node, strategy string, reasons ...string) (bool, error) {
reason := strategy
if len(reasons) > 0 {
reason += " (" + strings.Join(reasons, ", ") + ")"
}
if pe.maxPodsToEvictPerNode > 0 && pe.nodepodCount[node]+1 > pe.maxPodsToEvictPerNode {
metrics.PodsEvicted.With(map[string]string{"result": "maximum number reached", "strategy": strategy, "namespace": pod.Namespace}).Inc()
return false, fmt.Errorf("Maximum number %v of evicted pods per %q node reached", pe.maxPodsToEvictPerNode, node.Name)
}
err := evictPod(ctx, pe.client, pod, pe.policyGroupVersion, pe.dryRun)
if err != nil {
// err is used only for logging purposes
klog.Errorf("Error evicting pod: %#v in namespace %#v (%#v)", pod.Name, pod.Namespace, err)
klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod), "reason", reason)
metrics.PodsEvicted.With(map[string]string{"result": "error", "strategy": strategy, "namespace": pod.Namespace}).Inc()
return false, nil
}
pe.nodepodCount[node]++
if pe.dryRun {
klog.V(1).Infof("Evicted pod in dry run mode: %#v in namespace %#v", pod.Name, pod.Namespace)
klog.V(1).InfoS("Evicted pod in dry run mode", "pod", klog.KObj(pod), "reason", reason)
} else {
klog.V(1).Infof("Evicted pod: %#v in namespace %#v", pod.Name, pod.Namespace)
klog.V(1).InfoS("Evicted pod", "pod", klog.KObj(pod), "reason", reason)
eventBroadcaster := record.NewBroadcaster()
eventBroadcaster.StartStructuredLogging(3)
eventBroadcaster.StartRecordingToSink(&clientcorev1.EventSinkImpl{Interface: pe.client.CoreV1().Events(pod.Namespace)})
r := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "sigs.k8s.io.descheduler"})
r.Event(pod, v1.EventTypeNormal, "Descheduled", fmt.Sprintf("pod evicted by sigs.k8s.io/descheduler%s", reason))
metrics.PodsEvicted.With(map[string]string{"result": "success", "strategy": strategy, "namespace": pod.Namespace}).Inc()
}
return true, nil
}
@@ -123,14 +157,6 @@ func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, poli
}
err := client.PolicyV1beta1().Evictions(eviction.Namespace).Evict(ctx, eviction)
if err == nil {
eventBroadcaster := record.NewBroadcaster()
eventBroadcaster.StartLogging(klog.V(3).Infof)
eventBroadcaster.StartRecordingToSink(&clientcorev1.EventSinkImpl{Interface: client.CoreV1().Events(pod.Namespace)})
r := eventBroadcaster.NewRecorder(scheme.Scheme, v1.EventSource{Component: "sigs.k8s.io.descheduler"})
r.Event(pod, v1.EventTypeNormal, "Descheduled", "pod evicted by sigs.k8s.io/descheduler")
return nil
}
if apierrors.IsTooManyRequests(err) {
return fmt.Errorf("error when evicting pod (ignoring) %q: %v", pod.Name, err)
}
@@ -139,3 +165,152 @@ func evictPod(ctx context.Context, client clientset.Interface, pod *v1.Pod, poli
}
return err
}
type Options struct {
priority *int32
nodeFit bool
labelSelector labels.Selector
}
// WithPriorityThreshold sets a threshold for pod's priority class.
// Any pod whose priority class is lower is evictable.
func WithPriorityThreshold(priority int32) func(opts *Options) {
return func(opts *Options) {
var p int32 = priority
opts.priority = &p
}
}
// WithNodeFit sets whether or not to consider taints, node selectors,
// and pod affinity when evicting. A pod whose tolerations, node selectors,
// and affinity match a node other than the one it is currently running on
// is evictable.
func WithNodeFit(nodeFit bool) func(opts *Options) {
return func(opts *Options) {
opts.nodeFit = nodeFit
}
}
// WithLabelSelector sets whether or not to apply label filtering when evicting.
// Any pod matching the label selector is considered evictable.
func WithLabelSelector(labelSelector labels.Selector) func(opts *Options) {
return func(opts *Options) {
opts.labelSelector = labelSelector
}
}
type constraint func(pod *v1.Pod) error
type evictable struct {
constraints []constraint
}
// Evictable provides an implementation of IsEvictable(IsEvictable(pod *v1.Pod) bool).
// The method accepts a list of options which allow to extend constraints
// which decides when a pod is considered evictable.
func (pe *PodEvictor) Evictable(opts ...func(opts *Options)) *evictable {
options := &Options{}
for _, opt := range opts {
opt(options)
}
ev := &evictable{}
if !pe.evictSystemCriticalPods {
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
// Moved from IsEvictable function to allow for disabling
if utils.IsCriticalPriorityPod(pod) {
return fmt.Errorf("pod has system critical priority")
}
return nil
})
if options.priority != nil {
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
if IsPodEvictableBasedOnPriority(pod, *options.priority) {
return nil
}
return fmt.Errorf("pod has higher priority than specified priority class threshold")
})
}
}
if !pe.evictLocalStoragePods {
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
if utils.IsPodWithLocalStorage(pod) {
return fmt.Errorf("pod has local storage and descheduler is not configured with evictLocalStoragePods")
}
return nil
})
}
if pe.ignorePvcPods {
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
if utils.IsPodWithPVC(pod) {
return fmt.Errorf("pod has a PVC and descheduler is configured to ignore PVC pods")
}
return nil
})
}
if options.nodeFit {
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
if !nodeutil.PodFitsAnyOtherNode(pod, pe.nodes) {
return fmt.Errorf("pod does not fit on any other node because of nodeSelector(s), Taint(s), or nodes marked as unschedulable")
}
return nil
})
}
if options.labelSelector != nil && !options.labelSelector.Empty() {
ev.constraints = append(ev.constraints, func(pod *v1.Pod) error {
if !options.labelSelector.Matches(labels.Set(pod.Labels)) {
return fmt.Errorf("pod labels do not match the labelSelector filter in the policy parameter")
}
return nil
})
}
return ev
}
// IsEvictable decides when a pod is evictable
func (ev *evictable) IsEvictable(pod *v1.Pod) bool {
checkErrs := []error{}
ownerRefList := podutil.OwnerRef(pod)
if utils.IsDaemonsetPod(ownerRefList) {
checkErrs = append(checkErrs, fmt.Errorf("pod is a DaemonSet pod"))
}
if len(ownerRefList) == 0 {
checkErrs = append(checkErrs, fmt.Errorf("pod does not have any ownerrefs"))
}
if utils.IsMirrorPod(pod) {
checkErrs = append(checkErrs, fmt.Errorf("pod is a mirror pod"))
}
if utils.IsStaticPod(pod) {
checkErrs = append(checkErrs, fmt.Errorf("pod is a static pod"))
}
for _, c := range ev.constraints {
if err := c(pod); err != nil {
checkErrs = append(checkErrs, err)
}
}
if len(checkErrs) > 0 && !HaveEvictAnnotation(pod) {
klog.V(4).InfoS("Pod lacks an eviction annotation and fails the following checks", "pod", klog.KObj(pod), "checks", errors.NewAggregate(checkErrs).Error())
return false
}
return true
}
// HaveEvictAnnotation checks if the pod have evict annotation
func HaveEvictAnnotation(pod *v1.Pod) bool {
_, found := pod.ObjectMeta.Annotations[evictPodAnnotationKey]
return found
}
// IsPodEvictableBasedOnPriority checks if the given pod is evictable based on priority resolved from pod Spec.
func IsPodEvictableBasedOnPriority(pod *v1.Pod, priority int32) bool {
return pod.Spec.Priority == nil || *pod.Spec.Priority < priority
}

View File

@@ -21,9 +21,12 @@ import (
"testing"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
"sigs.k8s.io/descheduler/test"
)
@@ -65,3 +68,416 @@ func TestEvictPod(t *testing.T) {
}
}
}
func TestIsEvictable(t *testing.T) {
n1 := test.BuildTestNode("node1", 1000, 2000, 13, nil)
lowPriority := int32(800)
highPriority := int32(900)
nodeTaintKey := "hardware"
nodeTaintValue := "gpu"
nodeLabelKey := "datacenter"
nodeLabelValue := "east"
type testCase struct {
pod *v1.Pod
nodes []*v1.Node
runBefore func(*v1.Pod, []*v1.Node)
evictLocalStoragePods bool
evictSystemCriticalPods bool
priorityThreshold *int32
nodeFit bool
result bool
}
testCases := []testCase{
{ // Normal pod eviction with normal ownerRefs
pod: test.BuildTestPod("p1", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Normal pod eviction with normal ownerRefs and descheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p2", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Normal pod eviction with replicaSet ownerRefs
pod: test.BuildTestPod("p3", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Normal pod eviction with replicaSet ownerRefs and descheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p4", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Normal pod eviction with statefulSet ownerRefs
pod: test.BuildTestPod("p18", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetStatefulSetOwnerRefList()
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Normal pod eviction with statefulSet ownerRefs and descheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p19", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetStatefulSetOwnerRefList()
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Pod not evicted because it is bound to a PV and evictLocalStoragePods = false
pod: test.BuildTestPod("p5", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: false,
}, { // Pod is evicted because it is bound to a PV and evictLocalStoragePods = true
pod: test.BuildTestPod("p6", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
},
evictLocalStoragePods: true,
evictSystemCriticalPods: false,
result: true,
}, { // Pod is evicted because it is bound to a PV and evictLocalStoragePods = false, but it has scheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p7", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Pod not evicted becasuse it is part of a daemonSet
pod: test.BuildTestPod("p8", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: false,
}, { // Pod is evicted becasuse it is part of a daemonSet, but it has scheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p9", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Pod not evicted becasuse it is a mirror pod
pod: test.BuildTestPod("p10", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Annotations = test.GetMirrorPodAnnotation()
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: false,
}, { // Pod is evicted becasuse it is a mirror pod, but it has scheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p11", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Annotations = test.GetMirrorPodAnnotation()
pod.Annotations["descheduler.alpha.kubernetes.io/evict"] = "true"
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Pod not evicted becasuse it has system critical priority
pod: test.BuildTestPod("p12", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: false,
}, { // Pod is evicted becasuse it has system critical priority, but it has scheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p13", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
pod.Annotations = map[string]string{
"descheduler.alpha.kubernetes.io/evict": "true",
}
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
result: true,
}, { // Pod not evicted becasuse it has a priority higher than the configured priority threshold
pod: test.BuildTestPod("p14", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Priority = &highPriority
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
priorityThreshold: &lowPriority,
result: false,
}, { // Pod is evicted becasuse it has a priority higher than the configured priority threshold, but it has scheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p15", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.Spec.Priority = &highPriority
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
priorityThreshold: &lowPriority,
result: true,
}, { // Pod is evicted becasuse it has system critical priority, but evictSystemCriticalPods = true
pod: test.BuildTestPod("p16", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
},
evictLocalStoragePods: false,
evictSystemCriticalPods: true,
result: true,
}, { // Pod is evicted becasuse it has system critical priority, but evictSystemCriticalPods = true and it has scheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p16", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
},
evictLocalStoragePods: false,
evictSystemCriticalPods: true,
result: true,
}, { // Pod is evicted becasuse it has a priority higher than the configured priority threshold, but evictSystemCriticalPods = true
pod: test.BuildTestPod("p17", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Priority = &highPriority
},
evictLocalStoragePods: false,
evictSystemCriticalPods: true,
priorityThreshold: &lowPriority,
result: true,
}, { // Pod is evicted becasuse it has a priority higher than the configured priority threshold, but evictSystemCriticalPods = true and it has scheduler.alpha.kubernetes.io/evict annotation
pod: test.BuildTestPod("p17", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.Spec.Priority = &highPriority
},
evictLocalStoragePods: false,
evictSystemCriticalPods: true,
priorityThreshold: &lowPriority,
result: true,
}, { // Pod with no tolerations running on normal node, all other nodes tainted
pod: test.BuildTestPod("p1", 400, 0, n1.Name, nil),
nodes: []*v1.Node{test.BuildTestNode("node2", 1000, 2000, 13, nil), test.BuildTestNode("node3", 1000, 2000, 13, nil)},
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
for _, node := range nodes {
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
}
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
nodeFit: true,
result: false,
}, { // Pod with correct tolerations running on normal node, all other nodes tainted
pod: test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.Spec.Tolerations = []v1.Toleration{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
}),
nodes: []*v1.Node{test.BuildTestNode("node2", 1000, 2000, 13, nil), test.BuildTestNode("node3", 1000, 2000, 13, nil)},
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
for _, node := range nodes {
node.Spec.Taints = []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
}
}
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
nodeFit: true,
result: true,
}, { // Pod with incorrect node selector
pod: test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: "fail",
}
}),
nodes: []*v1.Node{test.BuildTestNode("node2", 1000, 2000, 13, nil), test.BuildTestNode("node3", 1000, 2000, 13, nil)},
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
for _, node := range nodes {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
nodeFit: true,
result: false,
}, { // Pod with correct node selector
pod: test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
pod.Spec.NodeSelector = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}),
nodes: []*v1.Node{test.BuildTestNode("node2", 1000, 2000, 13, nil), test.BuildTestNode("node3", 1000, 2000, 13, nil)},
runBefore: func(pod *v1.Pod, nodes []*v1.Node) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
for _, node := range nodes {
node.ObjectMeta.Labels = map[string]string{
nodeLabelKey: nodeLabelValue,
}
}
},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
nodeFit: true,
result: true,
},
}
for _, test := range testCases {
test.runBefore(test.pod, test.nodes)
nodes := append(test.nodes, n1)
podEvictor := &PodEvictor{
evictLocalStoragePods: test.evictLocalStoragePods,
evictSystemCriticalPods: test.evictSystemCriticalPods,
nodes: nodes,
}
evictable := podEvictor.Evictable()
var opts []func(opts *Options)
if test.priorityThreshold != nil {
opts = append(opts, WithPriorityThreshold(*test.priorityThreshold))
}
if test.nodeFit {
opts = append(opts, WithNodeFit(true))
}
evictable = podEvictor.Evictable(opts...)
result := evictable.IsEvictable(test.pod)
if result != test.result {
t.Errorf("IsEvictable should return for pod %s %t, but it returns %t", test.pod.Name, test.result, result)
}
}
}
func TestPodTypes(t *testing.T) {
n1 := test.BuildTestNode("node1", 1000, 2000, 9, nil)
p1 := test.BuildTestPod("p1", 400, 0, n1.Name, nil)
// These won't be evicted.
p2 := test.BuildTestPod("p2", 400, 0, n1.Name, nil)
p3 := test.BuildTestPod("p3", 400, 0, n1.Name, nil)
p4 := test.BuildTestPod("p4", 400, 0, n1.Name, nil)
p1.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
// The following 4 pods won't get evicted.
// A daemonset.
//p2.Annotations = test.GetDaemonSetAnnotation()
p2.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
// A pod with local storage.
p3.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
p3.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
// A Mirror Pod.
p4.Annotations = test.GetMirrorPodAnnotation()
if !utils.IsMirrorPod(p4) {
t.Errorf("Expected p4 to be a mirror pod.")
}
if !utils.IsPodWithLocalStorage(p3) {
t.Errorf("Expected p3 to be a pod with local storage.")
}
ownerRefList := podutil.OwnerRef(p2)
if !utils.IsDaemonsetPod(ownerRefList) {
t.Errorf("Expected p2 to be a daemonset pod.")
}
ownerRefList = podutil.OwnerRef(p1)
if utils.IsDaemonsetPod(ownerRefList) || utils.IsPodWithLocalStorage(p1) || utils.IsCriticalPriorityPod(p1) || utils.IsMirrorPod(p1) || utils.IsStaticPod(p1) {
t.Errorf("Expected p1 to be a normal pod.")
}
}

View File

@@ -18,18 +18,19 @@ package node
import (
"context"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
coreinformers "k8s.io/client-go/informers/core/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog"
"k8s.io/klog/v2"
"sigs.k8s.io/descheduler/pkg/utils"
)
// ReadyNodes returns ready nodes irrespective of whether they are
// schedulable or not.
func ReadyNodes(ctx context.Context, client clientset.Interface, nodeInformer coreinformers.NodeInformer, nodeSelector string, stopChannel <-chan struct{}) ([]*v1.Node, error) {
func ReadyNodes(ctx context.Context, client clientset.Interface, nodeInformer coreinformers.NodeInformer, nodeSelector string) ([]*v1.Node, error) {
ns, err := labels.Parse(nodeSelector)
if err != nil {
return []*v1.Node{}, err
@@ -42,7 +43,7 @@ func ReadyNodes(ctx context.Context, client clientset.Interface, nodeInformer co
}
if len(nodes) == 0 {
klog.V(2).Infof("node lister returned empty list, now fetch directly")
klog.V(2).InfoS("Node lister returned empty list, now fetch directly")
nItems, err := client.CoreV1().Nodes().List(ctx, metav1.ListOptions{LabelSelector: nodeSelector})
if err != nil {
@@ -77,25 +78,56 @@ func IsReady(node *v1.Node) bool {
// - NodeOutOfDisk condition status is ConditionFalse,
// - NodeNetworkUnavailable condition status is ConditionFalse.
if cond.Type == v1.NodeReady && cond.Status != v1.ConditionTrue {
klog.V(1).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status)
klog.V(1).InfoS("Ignoring node", "node", klog.KObj(node), "condition", cond.Type, "status", cond.Status)
return false
} /*else if cond.Type == v1.NodeOutOfDisk && cond.Status != v1.ConditionFalse {
klog.V(4).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status)
klog.V(4).InfoS("Ignoring node with condition status", "node", klog.KObj(node.Name), "condition", cond.Type, "status", cond.Status)
return false
} else if cond.Type == v1.NodeNetworkUnavailable && cond.Status != v1.ConditionFalse {
klog.V(4).Infof("Ignoring node %v with %v condition status %v", node.Name, cond.Type, cond.Status)
klog.V(4).InfoS("Ignoring node with condition status", "node", klog.KObj(node.Name), "condition", cond.Type, "status", cond.Status)
return false
}*/
}
// Ignore nodes that are marked unschedulable
/*if node.Spec.Unschedulable {
klog.V(4).Infof("Ignoring node %v since it is unschedulable", node.Name)
klog.V(4).InfoS("Ignoring node since it is unschedulable", "node", klog.KObj(node.Name))
return false
}*/
return true
}
// IsNodeUnschedulable checks if the node is unschedulable. This is helper function to check only in case of
// PodFitsAnyOtherNode checks if the given pod fits any of the given nodes, besides the node
// the pod is already running on. The node fit is based on multiple criteria, like, pod node selector
// matching the node label (including affinity), the taints on the node, and the node being schedulable or not.
func PodFitsAnyOtherNode(pod *v1.Pod, nodes []*v1.Node) bool {
for _, node := range nodes {
// Skip node pod is already on
if node.Name == pod.Spec.NodeName {
continue
}
// Check node selector and required affinity
ok, err := utils.PodMatchNodeSelector(pod, node)
if err != nil || !ok {
continue
}
// Check taints (we only care about NoSchedule and NoExecute taints)
ok = utils.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, node.Spec.Taints, func(taint *v1.Taint) bool {
return taint.Effect == v1.TaintEffectNoSchedule || taint.Effect == v1.TaintEffectNoExecute
})
if !ok {
continue
}
// Check if node is schedulable
if !IsNodeUnschedulable(node) {
klog.V(2).InfoS("Pod can possibly be scheduled on a different node", "pod", klog.KObj(pod), "node", klog.KObj(node))
return true
}
}
return false
}
// IsNodeUnschedulable checks if the node is unschedulable. This is a helper function to check only in case of
// underutilized node so that they won't be accounted for.
func IsNodeUnschedulable(node *v1.Node) bool {
return node.Spec.Unschedulable
@@ -112,7 +144,7 @@ func PodFitsAnyNode(pod *v1.Pod, nodes []*v1.Node) bool {
continue
}
if !IsNodeUnschedulable(node) {
klog.V(2).Infof("Pod %v can possibly be scheduled on %v", pod.Name, node.Name)
klog.V(2).InfoS("Pod can possibly be scheduled on a different node", "pod", klog.KObj(pod), "node", klog.KObj(node))
return true
}
}
@@ -125,15 +157,15 @@ func PodFitsCurrentNode(pod *v1.Pod, node *v1.Node) bool {
ok, err := utils.PodMatchNodeSelector(pod, node)
if err != nil {
klog.Error(err)
klog.ErrorS(err, "Failed to match node selector")
return false
}
if !ok {
klog.V(1).Infof("Pod %v does not fit on node %v", pod.Name, node.Name)
klog.V(2).InfoS("Pod does not fit on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
return false
}
klog.V(3).Infof("Pod %v fits on node %v", pod.Name, node.Name)
klog.V(2).InfoS("Pod fits on node", "pod", klog.KObj(pod), "node", klog.KObj(node))
return true
}

View File

@@ -20,7 +20,7 @@ import (
"context"
"testing"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake"
@@ -69,12 +69,12 @@ func TestReadyNodesWithNodeSelector(t *testing.T) {
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
nodeInformer := sharedInformerFactory.Core().V1().Nodes()
stopChannel := make(chan struct{}, 0)
stopChannel := make(chan struct{})
sharedInformerFactory.Start(stopChannel)
sharedInformerFactory.WaitForCacheSync(stopChannel)
defer close(stopChannel)
nodes, _ := ReadyNodes(ctx, fakeClient, nodeInformer, nodeSelector, nil)
nodes, _ := ReadyNodes(ctx, fakeClient, nodeInformer, nodeSelector)
if nodes[0].Name != "node1" {
t.Errorf("Expected node1, got %s", nodes[0].Name)
@@ -200,10 +200,15 @@ func TestPodFitsCurrentNode(t *testing.T) {
}
}
func TestPodFitsAnyNode(t *testing.T) {
func TestPodFitsAnyOtherNode(t *testing.T) {
nodeLabelKey := "kubernetes.io/desiredNode"
nodeLabelValue := "yes"
nodeTaintKey := "hardware"
nodeTaintValue := "gpu"
// Staging node has no scheduling restrictions, but the pod always starts here and PodFitsAnyOtherNode() doesn't take into account the node the pod is running on.
nodeNames := []string{"node1", "node2", "stagingNode"}
tests := []struct {
description string
@@ -212,33 +217,12 @@ func TestPodFitsAnyNode(t *testing.T) {
success bool
}{
{
description: "Pod expected to fit one of the nodes",
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: nodeLabelKey,
Operator: "In",
Values: []string{
nodeLabelValue,
},
},
},
},
},
},
},
},
},
},
description: "Pod fits another node matching node affinity",
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue),
nodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[0],
Labels: map[string]string{
nodeLabelKey: nodeLabelValue,
},
@@ -246,42 +230,27 @@ func TestPodFitsAnyNode(t *testing.T) {
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[1],
Labels: map[string]string{
nodeLabelKey: "no",
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[2],
},
},
},
success: true,
},
{
description: "Pod expected to fit one of the nodes (error node first)",
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: nodeLabelKey,
Operator: "In",
Values: []string{
nodeLabelValue,
},
},
},
},
},
},
},
},
},
},
description: "Pod expected to fit one of the nodes",
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue),
nodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[0],
Labels: map[string]string{
nodeLabelKey: "no",
},
@@ -289,42 +258,27 @@ func TestPodFitsAnyNode(t *testing.T) {
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[1],
Labels: map[string]string{
nodeLabelKey: nodeLabelValue,
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[2],
},
},
},
success: true,
},
{
description: "Pod expected to fit none of the nodes",
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: nodeLabelKey,
Operator: "In",
Values: []string{
nodeLabelValue,
},
},
},
},
},
},
},
},
},
},
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue),
nodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[0],
Labels: map[string]string{
nodeLabelKey: "unfit1",
},
@@ -332,42 +286,27 @@ func TestPodFitsAnyNode(t *testing.T) {
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[1],
Labels: map[string]string{
nodeLabelKey: "unfit2",
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[2],
},
},
},
success: false,
},
{
description: "Nodes are unschedulable but labels match, should fail",
pod: &v1.Pod{
Spec: v1.PodSpec{
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: nodeLabelKey,
Operator: "In",
Values: []string{
nodeLabelValue,
},
},
},
},
},
},
},
},
},
},
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue),
nodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[0],
Labels: map[string]string{
nodeLabelKey: nodeLabelValue,
},
@@ -378,20 +317,136 @@ func TestPodFitsAnyNode(t *testing.T) {
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[1],
Labels: map[string]string{
nodeLabelKey: "no",
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[2],
},
},
},
success: false,
},
{
description: "Two nodes matches node selector, one of them is tained, should pass",
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue),
nodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[0],
Labels: map[string]string{
nodeLabelKey: nodeLabelValue,
},
},
Spec: v1.NodeSpec{
Taints: []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[1],
Labels: map[string]string{
nodeLabelKey: nodeLabelValue,
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[2],
},
},
},
success: true,
},
{
description: "Both nodes are tained, should fail",
pod: createPodManifest(nodeNames[2], nodeLabelKey, nodeLabelValue),
nodes: []*v1.Node{
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[0],
Labels: map[string]string{
nodeLabelKey: nodeLabelValue,
},
},
Spec: v1.NodeSpec{
Taints: []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoSchedule,
},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[1],
Labels: map[string]string{
nodeLabelKey: nodeLabelValue,
},
},
Spec: v1.NodeSpec{
Taints: []v1.Taint{
{
Key: nodeTaintKey,
Value: nodeTaintValue,
Effect: v1.TaintEffectNoExecute,
},
},
},
},
{
ObjectMeta: metav1.ObjectMeta{
Name: nodeNames[2],
},
},
},
success: false,
},
}
for _, tc := range tests {
actual := PodFitsAnyNode(tc.pod, tc.nodes)
actual := PodFitsAnyOtherNode(tc.pod, tc.nodes)
if actual != tc.success {
t.Errorf("Test %#v failed", tc.description)
}
}
}
func createPodManifest(nodeName string, nodeSelectorKey string, nodeSelectorValue string) *v1.Pod {
return (&v1.Pod{
Spec: v1.PodSpec{
NodeName: nodeName,
Affinity: &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: nodeSelectorKey,
Operator: "In",
Values: []string{
nodeSelectorValue,
},
},
},
},
},
},
},
},
},
})
}

View File

@@ -18,6 +18,8 @@ package pod
import (
"context"
"sort"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/fields"
@@ -25,57 +27,146 @@ import (
"sigs.k8s.io/descheduler/pkg/utils"
)
const (
evictPodAnnotationKey = "descheduler.alpha.kubernetes.io/evict"
)
// IsEvictable checks if a pod is evictable or not.
func IsEvictable(pod *v1.Pod, evictLocalStoragePods bool) bool {
ownerRefList := OwnerRef(pod)
if !HaveEvictAnnotation(pod) && (IsMirrorPod(pod) || (!evictLocalStoragePods && IsPodWithLocalStorage(pod)) || len(ownerRefList) == 0 || IsDaemonsetPod(ownerRefList) || IsCriticalPod(pod)) {
return false
}
return true
type Options struct {
filter func(pod *v1.Pod) bool
includedNamespaces []string
excludedNamespaces []string
labelSelector *metav1.LabelSelector
}
// ListEvictablePodsOnNode returns the list of evictable pods on node.
func ListEvictablePodsOnNode(ctx context.Context, client clientset.Interface, node *v1.Node, evictLocalStoragePods bool) ([]*v1.Pod, error) {
pods, err := ListPodsOnANode(ctx, client, node)
if err != nil {
return []*v1.Pod{}, err
// WithFilter sets a pod filter.
// The filter function should return true if the pod should be returned from ListPodsOnANode
func WithFilter(filter func(pod *v1.Pod) bool) func(opts *Options) {
return func(opts *Options) {
opts.filter = filter
}
evictablePods := make([]*v1.Pod, 0)
for _, pod := range pods {
if !IsEvictable(pod, evictLocalStoragePods) {
continue
} else {
evictablePods = append(evictablePods, pod)
}
}
return evictablePods, nil
}
func ListPodsOnANode(ctx context.Context, client clientset.Interface, node *v1.Node) ([]*v1.Pod, error) {
fieldSelector, err := fields.ParseSelector("spec.nodeName=" + node.Name + ",status.phase!=" + string(v1.PodSucceeded) + ",status.phase!=" + string(v1.PodFailed))
if err != nil {
return []*v1.Pod{}, err
// WithNamespaces sets included namespaces
func WithNamespaces(namespaces []string) func(opts *Options) {
return func(opts *Options) {
opts.includedNamespaces = namespaces
}
}
podList, err := client.CoreV1().Pods(v1.NamespaceAll).List(ctx,
metav1.ListOptions{FieldSelector: fieldSelector.String()})
if err != nil {
return []*v1.Pod{}, err
// WithoutNamespaces sets excluded namespaces
func WithoutNamespaces(namespaces []string) func(opts *Options) {
return func(opts *Options) {
opts.excludedNamespaces = namespaces
}
}
// WithLabelSelector sets a pod label selector
func WithLabelSelector(labelSelector *metav1.LabelSelector) func(opts *Options) {
return func(opts *Options) {
opts.labelSelector = labelSelector
}
}
// ListPodsOnANode lists all of the pods on a node
// It also accepts an optional "filter" function which can be used to further limit the pods that are returned.
// (Usually this is podEvictor.Evictable().IsEvictable, in order to only list the evictable pods on a node, but can
// be used by strategies to extend it if there are further restrictions, such as with NodeAffinity).
func ListPodsOnANode(
ctx context.Context,
client clientset.Interface,
node *v1.Node,
opts ...func(opts *Options),
) ([]*v1.Pod, error) {
fieldSelectorString := "spec.nodeName=" + node.Name + ",status.phase!=" + string(v1.PodSucceeded) + ",status.phase!=" + string(v1.PodFailed)
return ListPodsOnANodeWithFieldSelector(ctx, client, node, fieldSelectorString, opts...)
}
// ListPodsOnANodeWithFieldSelector lists all of the pods on a node using the filter selectors
func ListPodsOnANodeWithFieldSelector(
ctx context.Context,
client clientset.Interface,
node *v1.Node,
fieldSelectorString string,
opts ...func(opts *Options),
) ([]*v1.Pod, error) {
options := &Options{}
for _, opt := range opts {
opt(options)
}
pods := make([]*v1.Pod, 0)
labelSelectorString := ""
if options.labelSelector != nil {
selector, err := metav1.LabelSelectorAsSelector(options.labelSelector)
if err != nil {
return []*v1.Pod{}, err
}
labelSelectorString = selector.String()
}
if len(options.includedNamespaces) > 0 {
fieldSelector, err := fields.ParseSelector(fieldSelectorString)
if err != nil {
return []*v1.Pod{}, err
}
for _, namespace := range options.includedNamespaces {
podList, err := client.CoreV1().Pods(namespace).List(ctx,
metav1.ListOptions{
FieldSelector: fieldSelector.String(),
LabelSelector: labelSelectorString,
})
if err != nil {
return []*v1.Pod{}, err
}
for i := range podList.Items {
if options.filter != nil && !options.filter(&podList.Items[i]) {
continue
}
pods = append(pods, &podList.Items[i])
}
}
return pods, nil
}
if len(options.excludedNamespaces) > 0 {
for _, namespace := range options.excludedNamespaces {
fieldSelectorString += ",metadata.namespace!=" + namespace
}
}
fieldSelector, err := fields.ParseSelector(fieldSelectorString)
if err != nil {
return []*v1.Pod{}, err
}
// INFO(jchaloup): field selectors do not work properly with listers
// Once the descheduler switches to pod listers (through informers),
// We need to flip to client-side filtering.
podList, err := client.CoreV1().Pods(v1.NamespaceAll).List(ctx,
metav1.ListOptions{
FieldSelector: fieldSelector.String(),
LabelSelector: labelSelectorString,
})
if err != nil {
return []*v1.Pod{}, err
}
for i := range podList.Items {
// fake client does not support field selectors
// so let's filter based on the node name as well (quite cheap)
if podList.Items[i].Spec.NodeName != node.Name {
continue
}
if options.filter != nil && !options.filter(&podList.Items[i]) {
continue
}
pods = append(pods, &podList.Items[i])
}
return pods, nil
}
func IsCriticalPod(pod *v1.Pod) bool {
return utils.IsCriticalPod(pod)
// OwnerRef returns the ownerRefList for the pod.
func OwnerRef(pod *v1.Pod) []metav1.OwnerReference {
return pod.ObjectMeta.GetOwnerReferences()
}
func IsBestEffortPod(pod *v1.Pod) bool {
@@ -90,37 +181,26 @@ func IsGuaranteedPod(pod *v1.Pod) bool {
return utils.GetPodQOS(pod) == v1.PodQOSGuaranteed
}
func IsDaemonsetPod(ownerRefList []metav1.OwnerReference) bool {
for _, ownerRef := range ownerRefList {
if ownerRef.Kind == "DaemonSet" {
// SortPodsBasedOnPriorityLowToHigh sorts pods based on their priorities from low to high.
// If pods have same priorities, they will be sorted by QoS in the following order:
// BestEffort, Burstable, Guaranteed
func SortPodsBasedOnPriorityLowToHigh(pods []*v1.Pod) {
sort.Slice(pods, func(i, j int) bool {
if pods[i].Spec.Priority == nil && pods[j].Spec.Priority != nil {
return true
}
}
return false
}
// IsMirrorPod checks whether the pod is a mirror pod.
func IsMirrorPod(pod *v1.Pod) bool {
return utils.IsMirrorPod(pod)
}
// HaveEvictAnnotation checks if the pod have evict annotation
func HaveEvictAnnotation(pod *v1.Pod) bool {
_, found := pod.ObjectMeta.Annotations[evictPodAnnotationKey]
return found
}
func IsPodWithLocalStorage(pod *v1.Pod) bool {
for _, volume := range pod.Spec.Volumes {
if volume.HostPath != nil || volume.EmptyDir != nil {
return true
if pods[j].Spec.Priority == nil && pods[i].Spec.Priority != nil {
return false
}
}
return false
}
// OwnerRef returns the ownerRefList for the pod.
func OwnerRef(pod *v1.Pod) []metav1.OwnerReference {
return pod.ObjectMeta.GetOwnerReferences()
if (pods[j].Spec.Priority == nil && pods[i].Spec.Priority == nil) || (*pods[i].Spec.Priority == *pods[j].Spec.Priority) {
if IsBestEffortPod(pods[i]) {
return true
}
if IsBurstablePod(pods[i]) && IsGuaranteedPod(pods[j]) {
return true
}
return false
}
return *pods[i].Spec.Priority < *pods[j].Spec.Priority
})
}

View File

@@ -17,221 +17,128 @@ limitations under the License.
package pod
import (
"context"
"fmt"
"reflect"
"strings"
"testing"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"sigs.k8s.io/descheduler/pkg/utils"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"sigs.k8s.io/descheduler/test"
)
func TestIsEvictable(t *testing.T) {
n1 := test.BuildTestNode("node1", 1000, 2000, 13, nil)
type testCase struct {
pod *v1.Pod
runBefore func(*v1.Pod)
evictLocalStoragePods bool
result bool
}
var (
lowPriority = int32(0)
highPriority = int32(10000)
)
testCases := []testCase{
func TestListPodsOnANode(t *testing.T) {
testCases := []struct {
name string
pods map[string][]v1.Pod
node *v1.Node
labelSelector *metav1.LabelSelector
expectedPodCount int
}{
{
pod: test.BuildTestPod("p1", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
name: "test listing pods on a node",
pods: map[string][]v1.Pod{
"n1": {
*test.BuildTestPod("pod1", 100, 0, "n1", nil),
*test.BuildTestPod("pod2", 100, 0, "n1", nil),
},
"n2": {*test.BuildTestPod("pod3", 100, 0, "n2", nil)},
},
evictLocalStoragePods: false,
result: true,
}, {
pod: test.BuildTestPod("p2", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
node: test.BuildTestNode("n1", 2000, 3000, 10, nil),
labelSelector: nil,
expectedPodCount: 2,
},
{
name: "test listing pods with label selector",
pods: map[string][]v1.Pod{
"n1": {
*test.BuildTestPod("pod1", 100, 0, "n1", nil),
*test.BuildTestPod("pod2", 100, 0, "n1", func(pod *v1.Pod) {
pod.Labels = map[string]string{"foo": "bar"}
}),
*test.BuildTestPod("pod3", 100, 0, "n1", func(pod *v1.Pod) {
pod.Labels = map[string]string{"foo": "bar1"}
}),
},
"n2": {*test.BuildTestPod("pod4", 100, 0, "n2", nil)},
},
evictLocalStoragePods: false,
result: true,
}, {
pod: test.BuildTestPod("p3", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
},
evictLocalStoragePods: false,
result: true,
}, {
pod: test.BuildTestPod("p4", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
},
evictLocalStoragePods: false,
result: true,
}, {
pod: test.BuildTestPod("p5", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Volumes = []v1.Volume{
node: test.BuildTestNode("n1", 2000, 3000, 10, nil),
labelSelector: &metav1.LabelSelector{
MatchExpressions: []metav1.LabelSelectorRequirement{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
Key: "foo",
Operator: metav1.LabelSelectorOpIn,
Values: []string{"bar", "bar1"},
},
}
},
},
evictLocalStoragePods: false,
result: false,
}, {
pod: test.BuildTestPod("p6", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
},
evictLocalStoragePods: true,
result: true,
}, {
pod: test.BuildTestPod("p7", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
},
evictLocalStoragePods: false,
result: true,
}, {
pod: test.BuildTestPod("p8", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
},
evictLocalStoragePods: false,
result: false,
}, {
pod: test.BuildTestPod("p9", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
pod.Annotations = map[string]string{"descheduler.alpha.kubernetes.io/evict": "true"}
pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
},
evictLocalStoragePods: false,
result: true,
}, {
pod: test.BuildTestPod("p10", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
pod.Annotations = test.GetMirrorPodAnnotation()
},
evictLocalStoragePods: false,
result: false,
}, {
pod: test.BuildTestPod("p11", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
pod.Annotations = test.GetMirrorPodAnnotation()
pod.Annotations["descheduler.alpha.kubernetes.io/evict"] = "true"
},
evictLocalStoragePods: false,
result: true,
}, {
pod: test.BuildTestPod("p12", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
},
evictLocalStoragePods: false,
result: false,
}, {
pod: test.BuildTestPod("p13", 400, 0, n1.Name, nil),
runBefore: func(pod *v1.Pod) {
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
pod.Annotations = map[string]string{
"descheduler.alpha.kubernetes.io/evict": "true",
}
},
evictLocalStoragePods: false,
result: true,
expectedPodCount: 2,
},
}
for _, test := range testCases {
test.runBefore(test.pod)
result := IsEvictable(test.pod, test.evictLocalStoragePods)
if result != test.result {
t.Errorf("IsEvictable should return for pod %s %t, but it returns %t", test.pod.Name, test.result, result)
for _, testCase := range testCases {
fakeClient := &fake.Clientset{}
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
list := action.(core.ListAction)
fieldString := list.GetListRestrictions().Fields.String()
if strings.Contains(fieldString, "n1") {
return true, &v1.PodList{Items: testCase.pods["n1"]}, nil
} else if strings.Contains(fieldString, "n2") {
return true, &v1.PodList{Items: testCase.pods["n2"]}, nil
}
return true, nil, fmt.Errorf("Failed to list: %v", list)
})
pods, _ := ListPodsOnANode(context.TODO(), fakeClient, testCase.node, WithLabelSelector(testCase.labelSelector))
if len(pods) != testCase.expectedPodCount {
t.Errorf("expected %v pods on node %v, got %+v", testCase.expectedPodCount, testCase.node.Name, len(pods))
}
}
}
func TestPodTypes(t *testing.T) {
n1 := test.BuildTestNode("node1", 1000, 2000, 9, nil)
p1 := test.BuildTestPod("p1", 400, 0, n1.Name, nil)
// These won't be evicted.
p2 := test.BuildTestPod("p2", 400, 0, n1.Name, nil)
p3 := test.BuildTestPod("p3", 400, 0, n1.Name, nil)
p4 := test.BuildTestPod("p4", 400, 0, n1.Name, nil)
p5 := test.BuildTestPod("p5", 400, 0, n1.Name, nil)
p6 := test.BuildTestPod("p6", 400, 0, n1.Name, nil)
func TestSortPodsBasedOnPriorityLowToHigh(t *testing.T) {
n1 := test.BuildTestNode("n1", 4000, 3000, 9, nil)
p6.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
p1 := test.BuildTestPod("p1", 400, 0, n1.Name, func(pod *v1.Pod) {
test.SetPodPriority(pod, lowPriority)
})
p1.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
// The following 4 pods won't get evicted.
// A daemonset.
//p2.Annotations = test.GetDaemonSetAnnotation()
p2.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
// A pod with local storage.
p3.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
p3.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
// A Mirror Pod.
p4.Annotations = test.GetMirrorPodAnnotation()
// A Critical Pod.
p5.Namespace = "kube-system"
priority := utils.SystemCriticalPriority
p5.Spec.Priority = &priority
systemCriticalPriority := utils.SystemCriticalPriority
p5.Spec.Priority = &systemCriticalPriority
if !IsMirrorPod(p4) {
t.Errorf("Expected p4 to be a mirror pod.")
}
if !IsCriticalPod(p5) {
t.Errorf("Expected p5 to be a critical pod.")
}
if !IsPodWithLocalStorage(p3) {
t.Errorf("Expected p3 to be a pod with local storage.")
}
ownerRefList := OwnerRef(p2)
if !IsDaemonsetPod(ownerRefList) {
t.Errorf("Expected p2 to be a daemonset pod.")
}
ownerRefList = OwnerRef(p1)
if IsDaemonsetPod(ownerRefList) || IsPodWithLocalStorage(p1) || IsCriticalPod(p1) || IsMirrorPod(p1) {
t.Errorf("Expected p1 to be a normal pod.")
}
// BestEffort
p2 := test.BuildTestPod("p2", 400, 0, n1.Name, func(pod *v1.Pod) {
test.SetPodPriority(pod, highPriority)
test.MakeBestEffortPod(pod)
})
// Burstable
p3 := test.BuildTestPod("p3", 400, 0, n1.Name, func(pod *v1.Pod) {
test.SetPodPriority(pod, highPriority)
test.MakeBurstablePod(pod)
})
// Guaranteed
p4 := test.BuildTestPod("p4", 400, 100, n1.Name, func(pod *v1.Pod) {
test.SetPodPriority(pod, highPriority)
test.MakeGuaranteedPod(pod)
})
// Best effort with nil priorities.
p5 := test.BuildTestPod("p5", 400, 100, n1.Name, test.MakeBestEffortPod)
p5.Spec.Priority = nil
p6 := test.BuildTestPod("p6", 400, 100, n1.Name, test.MakeGuaranteedPod)
p6.Spec.Priority = nil
podList := []*v1.Pod{p4, p3, p2, p1, p6, p5}
SortPodsBasedOnPriorityLowToHigh(podList)
if !reflect.DeepEqual(podList[len(podList)-1], p4) {
t.Errorf("Expected last pod in sorted list to be %v which of highest priority and guaranteed but got %v", p4, podList[len(podList)-1])
}
}

View File

@@ -21,7 +21,7 @@ import (
"io/ioutil"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/klog"
"k8s.io/klog/v2"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/api/v1alpha1"
@@ -30,7 +30,7 @@ import (
func LoadPolicyConfig(policyConfigFile string) (*api.DeschedulerPolicy, error) {
if policyConfigFile == "" {
klog.V(1).Infof("policy config file not specified")
klog.V(1).InfoS("Policy config file not specified")
return nil, nil
}

View File

@@ -19,9 +19,22 @@ package scheme
import (
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/serializer"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/api/v1alpha1"
"sigs.k8s.io/descheduler/pkg/apis/componentconfig"
componentconfigv1alpha1 "sigs.k8s.io/descheduler/pkg/apis/componentconfig/v1alpha1"
)
var (
Scheme = runtime.NewScheme()
Codecs = serializer.NewCodecFactory(Scheme)
)
func init() {
utilruntime.Must(api.AddToScheme(Scheme))
utilruntime.Must(v1alpha1.AddToScheme(Scheme))
utilruntime.Must(componentconfig.AddToScheme(Scheme))
utilruntime.Must(componentconfigv1alpha1.AddToScheme(Scheme))
}

View File

@@ -18,6 +18,8 @@ package strategies
import (
"context"
"fmt"
"math"
"reflect"
"sort"
"strings"
@@ -26,13 +28,34 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/sets"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog"
"k8s.io/klog/v2"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
)
func validateRemoveDuplicatePodsParams(params *api.StrategyParameters) error {
if params == nil {
return nil
}
// At most one of include/exclude can be set
if params.Namespaces != nil && len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 {
return fmt.Errorf("only one of Include/Exclude namespaces can be set")
}
if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" {
return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set")
}
return nil
}
type podOwner struct {
namespace, kind, name string
imagesHash string
}
// RemoveDuplicatePods removes the duplicate pods on node. This strategy evicts all duplicate pods on node.
// A pod is said to be a duplicate of other if both of them are from same creator, kind and are within the same
// namespace, and have at least one container with the same image.
@@ -42,81 +65,225 @@ func RemoveDuplicatePods(
client clientset.Interface,
strategy api.DeschedulerStrategy,
nodes []*v1.Node,
evictLocalStoragePods bool,
podEvictor *evictions.PodEvictor,
) {
if err := validateRemoveDuplicatePodsParams(strategy.Params); err != nil {
klog.ErrorS(err, "Invalid RemoveDuplicatePods parameters")
return
}
thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params)
if err != nil {
klog.ErrorS(err, "Failed to get threshold priority from strategy's params")
return
}
var includedNamespaces, excludedNamespaces []string
if strategy.Params != nil && strategy.Params.Namespaces != nil {
includedNamespaces = strategy.Params.Namespaces.Include
excludedNamespaces = strategy.Params.Namespaces.Exclude
}
nodeFit := false
if strategy.Params != nil {
nodeFit = strategy.Params.NodeFit
}
evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit))
duplicatePods := make(map[podOwner]map[string][]*v1.Pod)
ownerKeyOccurence := make(map[podOwner]int32)
nodeCount := 0
nodeMap := make(map[string]*v1.Node)
for _, node := range nodes {
klog.V(1).Infof("Processing node: %#v", node.Name)
duplicatePods := listDuplicatePodsOnANode(ctx, client, node, strategy, evictLocalStoragePods)
for _, pod := range duplicatePods {
if _, err := podEvictor.EvictPod(ctx, pod, node); err != nil {
klog.Errorf("Error evicting pod: (%#v)", err)
break
klog.V(1).InfoS("Processing node", "node", klog.KObj(node))
pods, err := podutil.ListPodsOnANode(ctx,
client,
node,
podutil.WithFilter(evictable.IsEvictable),
podutil.WithNamespaces(includedNamespaces),
podutil.WithoutNamespaces(excludedNamespaces),
)
if err != nil {
klog.ErrorS(err, "Error listing evictable pods on node", "node", klog.KObj(node))
continue
}
nodeMap[node.Name] = node
nodeCount++
// Each pod has a list of owners and a list of containers, and each container has 1 image spec.
// For each pod, we go through all the OwnerRef/Image mappings and represent them as a "key" string.
// All of those mappings together makes a list of "key" strings that essentially represent that pod's uniqueness.
// This list of keys representing a single pod is then sorted alphabetically.
// If any other pod has a list that matches that pod's list, those pods are undeniably duplicates for the following reasons:
// - The 2 pods have the exact same ownerrefs
// - The 2 pods have the exact same container images
//
// duplicateKeysMap maps the first Namespace/Kind/Name/Image in a pod's list to a 2D-slice of all the other lists where that is the first key
// (Since we sort each pod's list, we only need to key the map on the first entry in each list. Any pod that doesn't have
// the same first entry is clearly not a duplicate. This makes lookup quick and minimizes storage needed).
// If any of the existing lists for that first key matches the current pod's list, the current pod is a duplicate.
// If not, then we add this pod's list to the list of lists for that key.
duplicateKeysMap := map[string][][]string{}
for _, pod := range pods {
ownerRefList := podutil.OwnerRef(pod)
if hasExcludedOwnerRefKind(ownerRefList, strategy) || len(ownerRefList) == 0 {
continue
}
podContainerKeys := make([]string, 0, len(ownerRefList)*len(pod.Spec.Containers))
imageList := []string{}
for _, container := range pod.Spec.Containers {
imageList = append(imageList, container.Image)
}
sort.Strings(imageList)
imagesHash := strings.Join(imageList, "#")
for _, ownerRef := range ownerRefList {
ownerKey := podOwner{
namespace: pod.ObjectMeta.Namespace,
kind: ownerRef.Kind,
name: ownerRef.Name,
imagesHash: imagesHash,
}
ownerKeyOccurence[ownerKey] = ownerKeyOccurence[ownerKey] + 1
for _, image := range imageList {
// Namespace/Kind/Name should be unique for the cluster.
// We also consider the image, as 2 pods could have the same owner but serve different purposes
// So any non-unique Namespace/Kind/Name/Image pattern is a duplicate pod.
s := strings.Join([]string{pod.ObjectMeta.Namespace, ownerRef.Kind, ownerRef.Name, image}, "/")
podContainerKeys = append(podContainerKeys, s)
}
}
sort.Strings(podContainerKeys)
// If there have been any other pods with the same first "key", look through all the lists to see if any match
if existing, ok := duplicateKeysMap[podContainerKeys[0]]; ok {
matched := false
for _, keys := range existing {
if reflect.DeepEqual(keys, podContainerKeys) {
matched = true
klog.V(3).InfoS("Duplicate found", "pod", klog.KObj(pod))
for _, ownerRef := range ownerRefList {
ownerKey := podOwner{
namespace: pod.ObjectMeta.Namespace,
kind: ownerRef.Kind,
name: ownerRef.Name,
imagesHash: imagesHash,
}
if _, ok := duplicatePods[ownerKey]; !ok {
duplicatePods[ownerKey] = make(map[string][]*v1.Pod)
}
duplicatePods[ownerKey][node.Name] = append(duplicatePods[ownerKey][node.Name], pod)
}
break
}
}
if !matched {
// Found no matches, add this list of keys to the list of lists that have the same first key
duplicateKeysMap[podContainerKeys[0]] = append(duplicateKeysMap[podContainerKeys[0]], podContainerKeys)
}
} else {
// This is the first pod we've seen that has this first "key" entry
duplicateKeysMap[podContainerKeys[0]] = [][]string{podContainerKeys}
}
}
}
// 1. how many pods can be evicted to respect uniform placement of pods among viable nodes?
for ownerKey, podNodes := range duplicatePods {
targetNodes := getTargetNodes(podNodes, nodes)
klog.V(2).InfoS("Adjusting feasible nodes", "owner", ownerKey, "from", nodeCount, "to", len(targetNodes))
if len(targetNodes) < 2 {
klog.V(1).InfoS("Less than two feasible nodes for duplicates to land, skipping eviction", "owner", ownerKey)
continue
}
upperAvg := int(math.Ceil(float64(ownerKeyOccurence[ownerKey]) / float64(len(targetNodes))))
for nodeName, pods := range podNodes {
klog.V(2).InfoS("Average occurrence per node", "node", klog.KObj(nodeMap[nodeName]), "ownerKey", ownerKey, "avg", upperAvg)
// list of duplicated pods does not contain the original referential pod
if len(pods)+1 > upperAvg {
// It's assumed all duplicated pods are in the same priority class
// TODO(jchaloup): check if the pod has a different node to lend to
for _, pod := range pods[upperAvg-1:] {
if _, err := podEvictor.EvictPod(ctx, pod, nodeMap[nodeName], "RemoveDuplicatePods"); err != nil {
klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod))
break
}
}
}
}
}
}
// listDuplicatePodsOnANode lists duplicate pods on a given node.
// It checks for pods which have the same owner and have at least 1 container with the same image spec
func listDuplicatePodsOnANode(ctx context.Context, client clientset.Interface, node *v1.Node, strategy api.DeschedulerStrategy, evictLocalStoragePods bool) []*v1.Pod {
pods, err := podutil.ListEvictablePodsOnNode(ctx, client, node, evictLocalStoragePods)
if err != nil {
func getNodeAffinityNodeSelector(pod *v1.Pod) *v1.NodeSelector {
if pod.Spec.Affinity == nil {
return nil
}
if pod.Spec.Affinity.NodeAffinity == nil {
return nil
}
return pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution
}
duplicatePods := make([]*v1.Pod, 0, len(pods))
// Each pod has a list of owners and a list of containers, and each container has 1 image spec.
// For each pod, we go through all the OwnerRef/Image mappings and represent them as a "key" string.
// All of those mappings together makes a list of "key" strings that essentially represent that pod's uniqueness.
// This list of keys representing a single pod is then sorted alphabetically.
// If any other pod has a list that matches that pod's list, those pods are undeniably duplicates for the following reasons:
// - The 2 pods have the exact same ownerrefs
// - The 2 pods have the exact same container images
//
// duplicateKeysMap maps the first Namespace/Kind/Name/Image in a pod's list to a 2D-slice of all the other lists where that is the first key
// (Since we sort each pod's list, we only need to key the map on the first entry in each list. Any pod that doesn't have
// the same first entry is clearly not a duplicate. This makes lookup quick and minimizes storage needed).
// If any of the existing lists for that first key matches the current pod's list, the current pod is a duplicate.
// If not, then we add this pod's list to the list of lists for that key.
duplicateKeysMap := map[string][][]string{}
for _, pod := range pods {
ownerRefList := podutil.OwnerRef(pod)
if hasExcludedOwnerRefKind(ownerRefList, strategy) {
continue
}
podContainerKeys := make([]string, 0, len(ownerRefList)*len(pod.Spec.Containers))
for _, ownerRef := range ownerRefList {
for _, container := range pod.Spec.Containers {
// Namespace/Kind/Name should be unique for the cluster.
// We also consider the image, as 2 pods could have the same owner but serve different purposes
// So any non-unique Namespace/Kind/Name/Image pattern is a duplicate pod.
s := strings.Join([]string{pod.ObjectMeta.Namespace, ownerRef.Kind, ownerRef.Name, container.Image}, "/")
podContainerKeys = append(podContainerKeys, s)
}
}
sort.Strings(podContainerKeys)
func getTargetNodes(podNodes map[string][]*v1.Pod, nodes []*v1.Node) []*v1.Node {
// In order to reduce the number of pods processed, identify pods which have
// equal (tolerations, nodeselectors, node affinity) terms and considered them
// as identical. Identical pods wrt. (tolerations, nodeselectors, node affinity) terms
// will produce the same result when checking if a pod is feasible for a node.
// Thus, improving efficiency of processing pods marked for eviction.
// If there have been any other pods with the same first "key", look through all the lists to see if any match
if existing, ok := duplicateKeysMap[podContainerKeys[0]]; ok {
for _, keys := range existing {
if reflect.DeepEqual(keys, podContainerKeys) {
duplicatePods = append(duplicatePods, pod)
break
// Collect all distinct pods which differ in at least taints, node affinity or node selector terms
distinctPods := map[*v1.Pod]struct{}{}
for _, pods := range podNodes {
for _, pod := range pods {
duplicated := false
for dp := range distinctPods {
if utils.TolerationsEqual(pod.Spec.Tolerations, dp.Spec.Tolerations) &&
utils.NodeSelectorsEqual(getNodeAffinityNodeSelector(pod), getNodeAffinityNodeSelector(dp)) &&
reflect.DeepEqual(pod.Spec.NodeSelector, dp.Spec.NodeSelector) {
duplicated = true
continue
}
// Found no matches, add this list of keys to the list of lists that have the same first key
duplicateKeysMap[podContainerKeys[0]] = append(duplicateKeysMap[podContainerKeys[0]], podContainerKeys)
}
} else {
// This is the first pod we've seen that has this first "key" entry
duplicateKeysMap[podContainerKeys[0]] = [][]string{podContainerKeys}
if duplicated {
continue
}
distinctPods[pod] = struct{}{}
}
}
return duplicatePods
// For each distinct pod get a list of nodes where it can land
targetNodesMap := map[string]*v1.Node{}
for pod := range distinctPods {
matchingNodes := map[string]*v1.Node{}
for _, node := range nodes {
if !utils.TolerationsTolerateTaintsWithFilter(pod.Spec.Tolerations, node.Spec.Taints, func(taint *v1.Taint) bool {
return taint.Effect == v1.TaintEffectNoSchedule || taint.Effect == v1.TaintEffectNoExecute
}) {
continue
}
if match, err := utils.PodMatchNodeSelector(pod, node); err == nil && !match {
continue
}
matchingNodes[node.Name] = node
}
if len(matchingNodes) > 1 {
for nodeName := range matchingNodes {
targetNodesMap[nodeName] = matchingNodes[nodeName]
}
}
}
targetNodes := []*v1.Node{}
for _, node := range targetNodesMap {
targetNodes = append(targetNodes, node)
}
return targetNodes
}
func hasExcludedOwnerRefKind(ownerRefs []metav1.OwnerReference, strategy api.DeschedulerStrategy) bool {
if strategy.Params.RemoveDuplicates == nil {
if strategy.Params == nil || strategy.Params.RemoveDuplicates == nil {
return false
}
exclude := sets.NewString(strategy.Params.RemoveDuplicates.ExcludeOwnerKinds...)

View File

@@ -20,8 +20,10 @@ import (
"context"
"testing"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
policyv1 "k8s.io/api/policy/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
@@ -31,35 +33,73 @@ import (
"sigs.k8s.io/descheduler/test"
)
func buildTestPodWithImage(podName, node, image string) *v1.Pod {
pod := test.BuildTestPod(podName, 100, 0, node, test.SetRSOwnerRef)
pod.Spec.Containers = append(pod.Spec.Containers, v1.Container{
Name: image,
Image: image,
})
return pod
}
func TestFindDuplicatePods(t *testing.T) {
ctx := context.Background()
// first setup pods
node := test.BuildTestNode("n1", 2000, 3000, 10, nil)
p1 := test.BuildTestPod("p1", 100, 0, node.Name, nil)
node1 := test.BuildTestNode("n1", 2000, 3000, 10, nil)
node2 := test.BuildTestNode("n2", 2000, 3000, 10, nil)
node3 := test.BuildTestNode("n3", 2000, 3000, 10, func(node *v1.Node) {
node.Spec.Taints = []v1.Taint{
{
Key: "hardware",
Value: "gpu",
Effect: v1.TaintEffectNoSchedule,
},
}
})
node4 := test.BuildTestNode("n4", 2000, 3000, 10, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
"datacenter": "east",
}
})
node5 := test.BuildTestNode("n5", 2000, 3000, 10, func(node *v1.Node) {
node.Spec = v1.NodeSpec{
Unschedulable: true,
}
})
p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil)
p1.Namespace = "dev"
p2 := test.BuildTestPod("p2", 100, 0, node.Name, nil)
p2 := test.BuildTestPod("p2", 100, 0, node1.Name, nil)
p2.Namespace = "dev"
p3 := test.BuildTestPod("p3", 100, 0, node.Name, nil)
p3 := test.BuildTestPod("p3", 100, 0, node1.Name, nil)
p3.Namespace = "dev"
p4 := test.BuildTestPod("p4", 100, 0, node.Name, nil)
p5 := test.BuildTestPod("p5", 100, 0, node.Name, nil)
p6 := test.BuildTestPod("p6", 100, 0, node.Name, nil)
p7 := test.BuildTestPod("p7", 100, 0, node.Name, nil)
p4 := test.BuildTestPod("p4", 100, 0, node1.Name, nil)
p5 := test.BuildTestPod("p5", 100, 0, node1.Name, nil)
p6 := test.BuildTestPod("p6", 100, 0, node1.Name, nil)
p7 := test.BuildTestPod("p7", 100, 0, node1.Name, nil)
p7.Namespace = "kube-system"
p8 := test.BuildTestPod("p8", 100, 0, node.Name, nil)
p8 := test.BuildTestPod("p8", 100, 0, node1.Name, nil)
p8.Namespace = "test"
p9 := test.BuildTestPod("p9", 100, 0, node.Name, nil)
p9 := test.BuildTestPod("p9", 100, 0, node1.Name, nil)
p9.Namespace = "test"
p10 := test.BuildTestPod("p10", 100, 0, node.Name, nil)
p10 := test.BuildTestPod("p10", 100, 0, node1.Name, nil)
p10.Namespace = "test"
p11 := test.BuildTestPod("p11", 100, 0, node.Name, nil)
p11 := test.BuildTestPod("p11", 100, 0, node1.Name, nil)
p11.Namespace = "different-images"
p12 := test.BuildTestPod("p12", 100, 0, node.Name, nil)
p12 := test.BuildTestPod("p12", 100, 0, node1.Name, nil)
p12.Namespace = "different-images"
p13 := test.BuildTestPod("p13", 100, 0, node.Name, nil)
p13 := test.BuildTestPod("p13", 100, 0, node1.Name, nil)
p13.Namespace = "different-images"
p14 := test.BuildTestPod("p14", 100, 0, node.Name, nil)
p14 := test.BuildTestPod("p14", 100, 0, node1.Name, nil)
p14.Namespace = "different-images"
p15 := test.BuildTestPod("p15", 100, 0, node1.Name, nil)
p15.Namespace = "node-fit"
p16 := test.BuildTestPod("NOT1", 100, 0, node1.Name, nil)
p16.Namespace = "node-fit"
p17 := test.BuildTestPod("NOT2", 100, 0, node1.Name, nil)
p17.Namespace = "node-fit"
p18 := test.BuildTestPod("TARGET", 100, 0, node1.Name, nil)
p18.Namespace = "node-fit"
// ### Evictable Pods ###
@@ -113,99 +153,560 @@ func TestFindDuplicatePods(t *testing.T) {
Image: "foo",
})
// ### Pods Evictable Based On Node Fit ###
ownerRef3 := test.GetReplicaSetOwnerRefList()
p15.ObjectMeta.OwnerReferences = ownerRef3
p16.ObjectMeta.OwnerReferences = ownerRef3
p17.ObjectMeta.OwnerReferences = ownerRef3
p18.ObjectMeta.OwnerReferences = ownerRef3
p15.Spec.NodeSelector = map[string]string{
"datacenter": "west",
}
p16.Spec.NodeSelector = map[string]string{
"datacenter": "west",
}
p17.Spec.NodeSelector = map[string]string{
"datacenter": "west",
}
testCases := []struct {
description string
maxPodsToEvict int
maxPodsToEvictPerNode int
pods []v1.Pod
nodes []*v1.Node
expectedEvictedPodCount int
strategy api.DeschedulerStrategy
}{
{
description: "Three pods in the `dev` Namespace, bound to same ReplicaSet. 2 should be evicted.",
maxPodsToEvict: 5,
description: "Three pods in the `dev` Namespace, bound to same ReplicaSet. 1 should be evicted.",
maxPodsToEvictPerNode: 5,
pods: []v1.Pod{*p1, *p2, *p3},
expectedEvictedPodCount: 2,
nodes: []*v1.Node{node1, node2},
expectedEvictedPodCount: 1,
strategy: api.DeschedulerStrategy{},
},
{
description: "Three pods in the `dev` Namespace, bound to same ReplicaSet, but ReplicaSet kind is excluded. 0 should be evicted.",
maxPodsToEvict: 5,
maxPodsToEvictPerNode: 5,
pods: []v1.Pod{*p1, *p2, *p3},
nodes: []*v1.Node{node1, node2},
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{Params: api.StrategyParameters{RemoveDuplicates: &api.RemoveDuplicates{ExcludeOwnerKinds: []string{"ReplicaSet"}}}},
strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{RemoveDuplicates: &api.RemoveDuplicates{ExcludeOwnerKinds: []string{"ReplicaSet"}}}},
},
{
description: "Three Pods in the `test` Namespace, bound to same ReplicaSet. 2 should be evicted.",
maxPodsToEvict: 5,
description: "Three Pods in the `test` Namespace, bound to same ReplicaSet. 1 should be evicted.",
maxPodsToEvictPerNode: 5,
pods: []v1.Pod{*p8, *p9, *p10},
expectedEvictedPodCount: 2,
nodes: []*v1.Node{node1, node2},
expectedEvictedPodCount: 1,
strategy: api.DeschedulerStrategy{},
},
{
description: "Three Pods in the `dev` Namespace, three Pods in the `test` Namespace. Bound to ReplicaSet with same name. 4 should be evicted.",
maxPodsToEvict: 5,
maxPodsToEvictPerNode: 5,
pods: []v1.Pod{*p1, *p2, *p3, *p8, *p9, *p10},
expectedEvictedPodCount: 4,
nodes: []*v1.Node{node1, node2},
expectedEvictedPodCount: 2,
strategy: api.DeschedulerStrategy{},
},
{
description: "Pods are: part of DaemonSet, with local storage, mirror pod annotation, critical pod annotation - none should be evicted.",
maxPodsToEvict: 2,
maxPodsToEvictPerNode: 2,
pods: []v1.Pod{*p4, *p5, *p6, *p7},
nodes: []*v1.Node{node1, node2},
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{},
},
{
description: "Test all Pods: 4 should be evicted.",
maxPodsToEvict: 5,
maxPodsToEvictPerNode: 5,
pods: []v1.Pod{*p1, *p2, *p3, *p4, *p5, *p6, *p7, *p8, *p9, *p10},
expectedEvictedPodCount: 4,
nodes: []*v1.Node{node1, node2},
expectedEvictedPodCount: 2,
strategy: api.DeschedulerStrategy{},
},
{
description: "Pods with the same owner but different images should not be evicted",
maxPodsToEvict: 5,
maxPodsToEvictPerNode: 5,
pods: []v1.Pod{*p11, *p12},
nodes: []*v1.Node{node1, node2},
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{},
},
{
description: "Pods with multiple containers should not match themselves",
maxPodsToEvict: 5,
maxPodsToEvictPerNode: 5,
pods: []v1.Pod{*p13},
nodes: []*v1.Node{node1, node2},
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{},
},
{
description: "Pods with matching ownerrefs and at not all matching image should not trigger an eviction",
maxPodsToEvict: 5,
maxPodsToEvictPerNode: 5,
pods: []v1.Pod{*p11, *p13},
nodes: []*v1.Node{node1, node2},
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{},
},
{
description: "Three pods in the `dev` Namespace, bound to same ReplicaSet. Only node available has a taint, and nodeFit set to true. 0 should be evicted.",
maxPodsToEvictPerNode: 5,
pods: []v1.Pod{*p1, *p2, *p3},
nodes: []*v1.Node{node1, node3},
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}},
},
{
description: "Three pods in the `node-fit` Namespace, bound to same ReplicaSet, all with a nodeSelector. Only node available has an incorrect node label, and nodeFit set to true. 0 should be evicted.",
maxPodsToEvictPerNode: 5,
pods: []v1.Pod{*p15, *p16, *p17},
nodes: []*v1.Node{node1, node4},
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}},
},
{
description: "Three pods in the `node-fit` Namespace, bound to same ReplicaSet. Only node available is not schedulable, and nodeFit set to true. 0 should be evicted.",
maxPodsToEvictPerNode: 5,
pods: []v1.Pod{*p1, *p2, *p3},
nodes: []*v1.Node{node1, node5},
expectedEvictedPodCount: 0,
strategy: api.DeschedulerStrategy{Params: &api.StrategyParameters{NodeFit: true}},
},
}
for _, testCase := range testCases {
fakeClient := &fake.Clientset{}
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
return true, &v1.PodList{Items: testCase.pods}, nil
})
fakeClient.Fake.AddReactor("get", "nodes", func(action core.Action) (bool, runtime.Object, error) {
return true, node, nil
})
podEvictor := evictions.NewPodEvictor(
fakeClient,
"v1",
false,
testCase.maxPodsToEvict,
[]*v1.Node{node},
)
t.Run(testCase.description, func(t *testing.T) {
fakeClient := &fake.Clientset{}
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
return true, &v1.PodList{Items: testCase.pods}, nil
})
podEvictor := evictions.NewPodEvictor(
fakeClient,
"v1",
false,
testCase.maxPodsToEvictPerNode,
testCase.nodes,
false,
false,
false,
)
RemoveDuplicatePods(ctx, fakeClient, testCase.strategy, []*v1.Node{node}, false, podEvictor)
podsEvicted := podEvictor.TotalEvicted()
if podsEvicted != testCase.expectedEvictedPodCount {
t.Errorf("Test error for description: %s. Expected evicted pods count %v, got %v", testCase.description, testCase.expectedEvictedPodCount, podsEvicted)
}
RemoveDuplicatePods(ctx, fakeClient, testCase.strategy, testCase.nodes, podEvictor)
podsEvicted := podEvictor.TotalEvicted()
if podsEvicted != testCase.expectedEvictedPodCount {
t.Errorf("Test error for description: %s. Expected evicted pods count %v, got %v", testCase.description, testCase.expectedEvictedPodCount, podsEvicted)
}
})
}
}
func TestRemoveDuplicatesUniformly(t *testing.T) {
ctx := context.Background()
setRSOwnerRef2 := func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = []metav1.OwnerReference{
{Kind: "ReplicaSet", APIVersion: "v1", Name: "replicaset-2"},
}
}
setTwoRSOwnerRef := func(pod *v1.Pod) {
pod.ObjectMeta.OwnerReferences = []metav1.OwnerReference{
{Kind: "ReplicaSet", APIVersion: "v1", Name: "replicaset-1"},
{Kind: "ReplicaSet", APIVersion: "v1", Name: "replicaset-2"},
}
}
setTolerationsK1 := func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
pod.Spec.Tolerations = []v1.Toleration{
{
Key: "k1",
Value: "v1",
Operator: v1.TolerationOpEqual,
Effect: v1.TaintEffectNoSchedule,
},
}
}
setTolerationsK2 := func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
pod.Spec.Tolerations = []v1.Toleration{
{
Key: "k2",
Value: "v2",
Operator: v1.TolerationOpEqual,
Effect: v1.TaintEffectNoSchedule,
},
}
}
setMasterNoScheduleTaint := func(node *v1.Node) {
node.Spec.Taints = []v1.Taint{
{
Effect: v1.TaintEffectNoSchedule,
Key: "node-role.kubernetes.io/master",
},
}
}
setMasterNoScheduleLabel := func(node *v1.Node) {
if node.ObjectMeta.Labels == nil {
node.ObjectMeta.Labels = map[string]string{}
}
node.ObjectMeta.Labels["node-role.kubernetes.io/master"] = ""
}
setWorkerLabel := func(node *v1.Node) {
if node.ObjectMeta.Labels == nil {
node.ObjectMeta.Labels = map[string]string{}
}
node.ObjectMeta.Labels["node-role.kubernetes.io/worker"] = "k1"
node.ObjectMeta.Labels["node-role.kubernetes.io/worker"] = "k2"
}
setNotMasterNodeSelectorK1 := func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
pod.Spec.Affinity = &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: "node-role.kubernetes.io/master",
Operator: v1.NodeSelectorOpDoesNotExist,
},
{
Key: "k1",
Operator: v1.NodeSelectorOpDoesNotExist,
},
},
},
},
},
},
}
}
setNotMasterNodeSelectorK2 := func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
pod.Spec.Affinity = &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: "node-role.kubernetes.io/master",
Operator: v1.NodeSelectorOpDoesNotExist,
},
{
Key: "k2",
Operator: v1.NodeSelectorOpDoesNotExist,
},
},
},
},
},
},
}
}
setWorkerLabelSelectorK1 := func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
if pod.Spec.NodeSelector == nil {
pod.Spec.NodeSelector = map[string]string{}
}
pod.Spec.NodeSelector["node-role.kubernetes.io/worker"] = "k1"
}
setWorkerLabelSelectorK2 := func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
if pod.Spec.NodeSelector == nil {
pod.Spec.NodeSelector = map[string]string{}
}
pod.Spec.NodeSelector["node-role.kubernetes.io/worker"] = "k2"
}
testCases := []struct {
description string
maxPodsToEvictPerNode int
pods []v1.Pod
nodes []*v1.Node
expectedEvictedPodCount int
strategy api.DeschedulerStrategy
}{
{
description: "Evict pods uniformly",
pods: []v1.Pod{
// (5,3,1) -> (3,3,3) -> 2 evictions
*test.BuildTestPod("p1", 100, 0, "n1", test.SetRSOwnerRef),
*test.BuildTestPod("p2", 100, 0, "n1", test.SetRSOwnerRef),
*test.BuildTestPod("p3", 100, 0, "n1", test.SetRSOwnerRef),
*test.BuildTestPod("p4", 100, 0, "n1", test.SetRSOwnerRef),
*test.BuildTestPod("p5", 100, 0, "n1", test.SetRSOwnerRef),
*test.BuildTestPod("p6", 100, 0, "n2", test.SetRSOwnerRef),
*test.BuildTestPod("p7", 100, 0, "n2", test.SetRSOwnerRef),
*test.BuildTestPod("p8", 100, 0, "n2", test.SetRSOwnerRef),
*test.BuildTestPod("p9", 100, 0, "n3", test.SetRSOwnerRef),
},
expectedEvictedPodCount: 2,
nodes: []*v1.Node{
test.BuildTestNode("n1", 2000, 3000, 10, nil),
test.BuildTestNode("n2", 2000, 3000, 10, nil),
test.BuildTestNode("n3", 2000, 3000, 10, nil),
},
strategy: api.DeschedulerStrategy{},
},
{
description: "Evict pods uniformly with one node left out",
pods: []v1.Pod{
// (5,3,1) -> (4,4,1) -> 1 eviction
*test.BuildTestPod("p1", 100, 0, "n1", test.SetRSOwnerRef),
*test.BuildTestPod("p2", 100, 0, "n1", test.SetRSOwnerRef),
*test.BuildTestPod("p3", 100, 0, "n1", test.SetRSOwnerRef),
*test.BuildTestPod("p4", 100, 0, "n1", test.SetRSOwnerRef),
*test.BuildTestPod("p5", 100, 0, "n1", test.SetRSOwnerRef),
*test.BuildTestPod("p6", 100, 0, "n2", test.SetRSOwnerRef),
*test.BuildTestPod("p7", 100, 0, "n2", test.SetRSOwnerRef),
*test.BuildTestPod("p8", 100, 0, "n2", test.SetRSOwnerRef),
*test.BuildTestPod("p9", 100, 0, "n3", test.SetRSOwnerRef),
},
expectedEvictedPodCount: 1,
nodes: []*v1.Node{
test.BuildTestNode("n1", 2000, 3000, 10, nil),
test.BuildTestNode("n2", 2000, 3000, 10, nil),
},
strategy: api.DeschedulerStrategy{},
},
{
description: "Evict pods uniformly with two replica sets",
pods: []v1.Pod{
// (5,3,1) -> (3,3,3) -> 2 evictions
*test.BuildTestPod("p11", 100, 0, "n1", setTwoRSOwnerRef),
*test.BuildTestPod("p12", 100, 0, "n1", setTwoRSOwnerRef),
*test.BuildTestPod("p13", 100, 0, "n1", setTwoRSOwnerRef),
*test.BuildTestPod("p14", 100, 0, "n1", setTwoRSOwnerRef),
*test.BuildTestPod("p15", 100, 0, "n1", setTwoRSOwnerRef),
*test.BuildTestPod("p16", 100, 0, "n2", setTwoRSOwnerRef),
*test.BuildTestPod("p17", 100, 0, "n2", setTwoRSOwnerRef),
*test.BuildTestPod("p18", 100, 0, "n2", setTwoRSOwnerRef),
*test.BuildTestPod("p19", 100, 0, "n3", setTwoRSOwnerRef),
},
expectedEvictedPodCount: 4,
nodes: []*v1.Node{
test.BuildTestNode("n1", 2000, 3000, 10, nil),
test.BuildTestNode("n2", 2000, 3000, 10, nil),
test.BuildTestNode("n3", 2000, 3000, 10, nil),
},
strategy: api.DeschedulerStrategy{},
},
{
description: "Evict pods uniformly with two owner references",
pods: []v1.Pod{
// (5,3,1) -> (3,3,3) -> 2 evictions
*test.BuildTestPod("p11", 100, 0, "n1", test.SetRSOwnerRef),
*test.BuildTestPod("p12", 100, 0, "n1", test.SetRSOwnerRef),
*test.BuildTestPod("p13", 100, 0, "n1", test.SetRSOwnerRef),
*test.BuildTestPod("p14", 100, 0, "n1", test.SetRSOwnerRef),
*test.BuildTestPod("p15", 100, 0, "n1", test.SetRSOwnerRef),
*test.BuildTestPod("p16", 100, 0, "n2", test.SetRSOwnerRef),
*test.BuildTestPod("p17", 100, 0, "n2", test.SetRSOwnerRef),
*test.BuildTestPod("p18", 100, 0, "n2", test.SetRSOwnerRef),
*test.BuildTestPod("p19", 100, 0, "n3", test.SetRSOwnerRef),
// (1,3,5) -> (3,3,3) -> 2 evictions
*test.BuildTestPod("p21", 100, 0, "n1", setRSOwnerRef2),
*test.BuildTestPod("p22", 100, 0, "n2", setRSOwnerRef2),
*test.BuildTestPod("p23", 100, 0, "n2", setRSOwnerRef2),
*test.BuildTestPod("p24", 100, 0, "n2", setRSOwnerRef2),
*test.BuildTestPod("p25", 100, 0, "n3", setRSOwnerRef2),
*test.BuildTestPod("p26", 100, 0, "n3", setRSOwnerRef2),
*test.BuildTestPod("p27", 100, 0, "n3", setRSOwnerRef2),
*test.BuildTestPod("p28", 100, 0, "n3", setRSOwnerRef2),
*test.BuildTestPod("p29", 100, 0, "n3", setRSOwnerRef2),
},
expectedEvictedPodCount: 4,
nodes: []*v1.Node{
test.BuildTestNode("n1", 2000, 3000, 10, nil),
test.BuildTestNode("n2", 2000, 3000, 10, nil),
test.BuildTestNode("n3", 2000, 3000, 10, nil),
},
strategy: api.DeschedulerStrategy{},
},
{
description: "Evict pods with number of pods less than nodes",
pods: []v1.Pod{
// (2,0,0) -> (1,1,0) -> 1 eviction
*test.BuildTestPod("p1", 100, 0, "n1", test.SetRSOwnerRef),
*test.BuildTestPod("p2", 100, 0, "n1", test.SetRSOwnerRef),
},
expectedEvictedPodCount: 1,
nodes: []*v1.Node{
test.BuildTestNode("n1", 2000, 3000, 10, nil),
test.BuildTestNode("n2", 2000, 3000, 10, nil),
test.BuildTestNode("n3", 2000, 3000, 10, nil),
},
strategy: api.DeschedulerStrategy{},
},
{
description: "Evict pods with number of pods less than nodes, but ignore different pods with the same ownerref",
pods: []v1.Pod{
// (1, 0, 0) for "bar","baz" images -> no eviction, even with a matching ownerKey
// (2, 0, 0) for "foo" image -> (1,1,0) - 1 eviction
// In this case the only "real" duplicates are p1 and p4, so one of those should be evicted
*buildTestPodWithImage("p1", "n1", "foo"),
*buildTestPodWithImage("p2", "n1", "bar"),
*buildTestPodWithImage("p3", "n1", "baz"),
*buildTestPodWithImage("p4", "n1", "foo"),
},
expectedEvictedPodCount: 1,
nodes: []*v1.Node{
test.BuildTestNode("n1", 2000, 3000, 10, nil),
test.BuildTestNode("n2", 2000, 3000, 10, nil),
test.BuildTestNode("n3", 2000, 3000, 10, nil),
},
strategy: api.DeschedulerStrategy{},
},
{
description: "Evict pods with a single pod with three nodes",
pods: []v1.Pod{
// (2,0,0) -> (1,1,0) -> 1 eviction
*test.BuildTestPod("p1", 100, 0, "n1", test.SetRSOwnerRef),
},
expectedEvictedPodCount: 0,
nodes: []*v1.Node{
test.BuildTestNode("n1", 2000, 3000, 10, nil),
test.BuildTestNode("n2", 2000, 3000, 10, nil),
test.BuildTestNode("n3", 2000, 3000, 10, nil),
},
strategy: api.DeschedulerStrategy{},
},
{
description: "Evict pods uniformly respecting taints",
pods: []v1.Pod{
// (5,3,1,0,0,0) -> (3,3,3,0,0,0) -> 2 evictions
*test.BuildTestPod("p1", 100, 0, "worker1", setTolerationsK1),
*test.BuildTestPod("p2", 100, 0, "worker1", setTolerationsK2),
*test.BuildTestPod("p3", 100, 0, "worker1", setTolerationsK1),
*test.BuildTestPod("p4", 100, 0, "worker1", setTolerationsK2),
*test.BuildTestPod("p5", 100, 0, "worker1", setTolerationsK1),
*test.BuildTestPod("p6", 100, 0, "worker2", setTolerationsK2),
*test.BuildTestPod("p7", 100, 0, "worker2", setTolerationsK1),
*test.BuildTestPod("p8", 100, 0, "worker2", setTolerationsK2),
*test.BuildTestPod("p9", 100, 0, "worker3", setTolerationsK1),
},
expectedEvictedPodCount: 2,
nodes: []*v1.Node{
test.BuildTestNode("worker1", 2000, 3000, 10, nil),
test.BuildTestNode("worker2", 2000, 3000, 10, nil),
test.BuildTestNode("worker3", 2000, 3000, 10, nil),
test.BuildTestNode("master1", 2000, 3000, 10, setMasterNoScheduleTaint),
test.BuildTestNode("master2", 2000, 3000, 10, setMasterNoScheduleTaint),
test.BuildTestNode("master3", 2000, 3000, 10, setMasterNoScheduleTaint),
},
strategy: api.DeschedulerStrategy{},
},
{
description: "Evict pods uniformly respecting RequiredDuringSchedulingIgnoredDuringExecution node affinity",
pods: []v1.Pod{
// (5,3,1,0,0,0) -> (3,3,3,0,0,0) -> 2 evictions
*test.BuildTestPod("p1", 100, 0, "worker1", setNotMasterNodeSelectorK1),
*test.BuildTestPod("p2", 100, 0, "worker1", setNotMasterNodeSelectorK2),
*test.BuildTestPod("p3", 100, 0, "worker1", setNotMasterNodeSelectorK1),
*test.BuildTestPod("p4", 100, 0, "worker1", setNotMasterNodeSelectorK2),
*test.BuildTestPod("p5", 100, 0, "worker1", setNotMasterNodeSelectorK1),
*test.BuildTestPod("p6", 100, 0, "worker2", setNotMasterNodeSelectorK2),
*test.BuildTestPod("p7", 100, 0, "worker2", setNotMasterNodeSelectorK1),
*test.BuildTestPod("p8", 100, 0, "worker2", setNotMasterNodeSelectorK2),
*test.BuildTestPod("p9", 100, 0, "worker3", setNotMasterNodeSelectorK1),
},
expectedEvictedPodCount: 2,
nodes: []*v1.Node{
test.BuildTestNode("worker1", 2000, 3000, 10, nil),
test.BuildTestNode("worker2", 2000, 3000, 10, nil),
test.BuildTestNode("worker3", 2000, 3000, 10, nil),
test.BuildTestNode("master1", 2000, 3000, 10, setMasterNoScheduleLabel),
test.BuildTestNode("master2", 2000, 3000, 10, setMasterNoScheduleLabel),
test.BuildTestNode("master3", 2000, 3000, 10, setMasterNoScheduleLabel),
},
strategy: api.DeschedulerStrategy{},
},
{
description: "Evict pods uniformly respecting node selector",
pods: []v1.Pod{
// (5,3,1,0,0,0) -> (3,3,3,0,0,0) -> 2 evictions
*test.BuildTestPod("p1", 100, 0, "worker1", setWorkerLabelSelectorK1),
*test.BuildTestPod("p2", 100, 0, "worker1", setWorkerLabelSelectorK2),
*test.BuildTestPod("p3", 100, 0, "worker1", setWorkerLabelSelectorK1),
*test.BuildTestPod("p4", 100, 0, "worker1", setWorkerLabelSelectorK2),
*test.BuildTestPod("p5", 100, 0, "worker1", setWorkerLabelSelectorK1),
*test.BuildTestPod("p6", 100, 0, "worker2", setWorkerLabelSelectorK2),
*test.BuildTestPod("p7", 100, 0, "worker2", setWorkerLabelSelectorK1),
*test.BuildTestPod("p8", 100, 0, "worker2", setWorkerLabelSelectorK2),
*test.BuildTestPod("p9", 100, 0, "worker3", setWorkerLabelSelectorK1),
},
expectedEvictedPodCount: 2,
nodes: []*v1.Node{
test.BuildTestNode("worker1", 2000, 3000, 10, setWorkerLabel),
test.BuildTestNode("worker2", 2000, 3000, 10, setWorkerLabel),
test.BuildTestNode("worker3", 2000, 3000, 10, setWorkerLabel),
test.BuildTestNode("master1", 2000, 3000, 10, nil),
test.BuildTestNode("master2", 2000, 3000, 10, nil),
test.BuildTestNode("master3", 2000, 3000, 10, nil),
},
strategy: api.DeschedulerStrategy{},
},
{
description: "Evict pods uniformly respecting node selector with zero target nodes",
pods: []v1.Pod{
// (5,3,1,0,0,0) -> (3,3,3,0,0,0) -> 2 evictions
*test.BuildTestPod("p1", 100, 0, "worker1", setWorkerLabelSelectorK1),
*test.BuildTestPod("p2", 100, 0, "worker1", setWorkerLabelSelectorK2),
*test.BuildTestPod("p3", 100, 0, "worker1", setWorkerLabelSelectorK1),
*test.BuildTestPod("p4", 100, 0, "worker1", setWorkerLabelSelectorK2),
*test.BuildTestPod("p5", 100, 0, "worker1", setWorkerLabelSelectorK1),
*test.BuildTestPod("p6", 100, 0, "worker2", setWorkerLabelSelectorK2),
*test.BuildTestPod("p7", 100, 0, "worker2", setWorkerLabelSelectorK1),
*test.BuildTestPod("p8", 100, 0, "worker2", setWorkerLabelSelectorK2),
*test.BuildTestPod("p9", 100, 0, "worker3", setWorkerLabelSelectorK1),
},
expectedEvictedPodCount: 0,
nodes: []*v1.Node{
test.BuildTestNode("worker1", 2000, 3000, 10, nil),
test.BuildTestNode("worker2", 2000, 3000, 10, nil),
test.BuildTestNode("worker3", 2000, 3000, 10, nil),
test.BuildTestNode("master1", 2000, 3000, 10, nil),
test.BuildTestNode("master2", 2000, 3000, 10, nil),
test.BuildTestNode("master3", 2000, 3000, 10, nil),
},
strategy: api.DeschedulerStrategy{},
},
}
for _, testCase := range testCases {
t.Run(testCase.description, func(t *testing.T) {
fakeClient := &fake.Clientset{}
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
return true, &v1.PodList{Items: testCase.pods}, nil
})
podEvictor := evictions.NewPodEvictor(
fakeClient,
policyv1.SchemeGroupVersion.String(),
false,
testCase.maxPodsToEvictPerNode,
testCase.nodes,
false,
false,
false,
)
RemoveDuplicatePods(ctx, fakeClient, testCase.strategy, testCase.nodes, podEvictor)
podsEvicted := podEvictor.TotalEvicted()
if podsEvicted != testCase.expectedEvictedPodCount {
t.Errorf("Test error for description: %s. Expected evicted pods count %v, got %v", testCase.description, testCase.expectedEvictedPodCount, podsEvicted)
}
})
}
}

View File

@@ -0,0 +1,171 @@
package strategies
import (
"context"
"fmt"
"k8s.io/apimachinery/pkg/util/sets"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
utilerrors "k8s.io/apimachinery/pkg/util/errors"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/descheduler/strategies/validation"
)
// validatedFailedPodsStrategyParams contains validated strategy parameters
type validatedFailedPodsStrategyParams struct {
validation.ValidatedStrategyParams
includingInitContainers bool
reasons sets.String
excludeOwnerKinds sets.String
minPodLifetimeSeconds *uint
}
// RemoveFailedPods removes Pods that are in failed status phase.
func RemoveFailedPods(
ctx context.Context,
client clientset.Interface,
strategy api.DeschedulerStrategy,
nodes []*v1.Node,
podEvictor *evictions.PodEvictor,
) {
strategyParams, err := validateAndParseRemoveFailedPodsParams(ctx, client, strategy.Params)
if err != nil {
klog.ErrorS(err, "Invalid RemoveFailedPods parameters")
return
}
evictable := podEvictor.Evictable(
evictions.WithPriorityThreshold(strategyParams.ThresholdPriority),
evictions.WithNodeFit(strategyParams.NodeFit),
evictions.WithLabelSelector(strategyParams.LabelSelector),
)
var labelSelector *metav1.LabelSelector
if strategy.Params != nil {
labelSelector = strategy.Params.LabelSelector
}
for _, node := range nodes {
klog.V(1).InfoS("Processing node", "node", klog.KObj(node))
fieldSelectorString := "spec.nodeName=" + node.Name + ",status.phase=" + string(v1.PodFailed)
pods, err := podutil.ListPodsOnANodeWithFieldSelector(
ctx,
client,
node,
fieldSelectorString,
podutil.WithFilter(evictable.IsEvictable),
podutil.WithNamespaces(strategyParams.IncludedNamespaces.UnsortedList()),
podutil.WithoutNamespaces(strategyParams.ExcludedNamespaces.UnsortedList()),
podutil.WithLabelSelector(labelSelector),
)
if err != nil {
klog.ErrorS(err, "Error listing a nodes failed pods", "node", klog.KObj(node))
continue
}
for i, pod := range pods {
if err = validateFailedPodShouldEvict(pod, *strategyParams); err != nil {
klog.V(4).InfoS(fmt.Sprintf("ignoring pod for eviction due to: %s", err.Error()), "pod", klog.KObj(pod))
continue
}
if _, err = podEvictor.EvictPod(ctx, pods[i], node, "FailedPod"); err != nil {
klog.ErrorS(err, "Error evicting pod", "pod", klog.KObj(pod))
break
}
}
}
}
func validateAndParseRemoveFailedPodsParams(
ctx context.Context,
client clientset.Interface,
params *api.StrategyParameters,
) (*validatedFailedPodsStrategyParams, error) {
if params == nil {
return &validatedFailedPodsStrategyParams{
ValidatedStrategyParams: validation.DefaultValidatedStrategyParams(),
}, nil
}
strategyParams, err := validation.ValidateAndParseStrategyParams(ctx, client, params)
if err != nil {
return nil, err
}
var reasons, excludeOwnerKinds sets.String
var includingInitContainers bool
var minPodLifetimeSeconds *uint
if params.FailedPods != nil {
reasons = sets.NewString(params.FailedPods.Reasons...)
includingInitContainers = params.FailedPods.IncludingInitContainers
excludeOwnerKinds = sets.NewString(params.FailedPods.ExcludeOwnerKinds...)
minPodLifetimeSeconds = params.FailedPods.MinPodLifetimeSeconds
}
return &validatedFailedPodsStrategyParams{
ValidatedStrategyParams: *strategyParams,
includingInitContainers: includingInitContainers,
reasons: reasons,
excludeOwnerKinds: excludeOwnerKinds,
minPodLifetimeSeconds: minPodLifetimeSeconds,
}, nil
}
// validateFailedPodShouldEvict looks at strategy params settings to see if the Pod
// should be evicted given the params in the PodFailed policy.
func validateFailedPodShouldEvict(pod *v1.Pod, strategyParams validatedFailedPodsStrategyParams) error {
var errs []error
if strategyParams.minPodLifetimeSeconds != nil {
podAgeSeconds := uint(metav1.Now().Sub(pod.GetCreationTimestamp().Local()).Seconds())
if podAgeSeconds < *strategyParams.minPodLifetimeSeconds {
errs = append(errs, fmt.Errorf("pod does not exceed the min age seconds of %d", *strategyParams.minPodLifetimeSeconds))
}
}
if len(strategyParams.excludeOwnerKinds) > 0 {
ownerRefList := podutil.OwnerRef(pod)
for _, owner := range ownerRefList {
if strategyParams.excludeOwnerKinds.Has(owner.Kind) {
errs = append(errs, fmt.Errorf("pod's owner kind of %s is excluded", owner.Kind))
}
}
}
if len(strategyParams.reasons) > 0 {
reasons := getFailedContainerStatusReasons(pod.Status.ContainerStatuses)
if strategyParams.includingInitContainers {
reasons = append(reasons, getFailedContainerStatusReasons(pod.Status.InitContainerStatuses)...)
}
if !strategyParams.reasons.HasAny(reasons...) {
errs = append(errs, fmt.Errorf("pod does not match any of the reasons"))
}
}
return utilerrors.NewAggregate(errs)
}
func getFailedContainerStatusReasons(containerStatuses []v1.ContainerStatus) []string {
reasons := make([]string, 0)
for _, containerStatus := range containerStatuses {
if containerStatus.State.Waiting != nil && containerStatus.State.Waiting.Reason != "" {
reasons = append(reasons, containerStatus.State.Waiting.Reason)
}
if containerStatus.State.Terminated != nil && containerStatus.State.Terminated.Reason != "" {
reasons = append(reasons, containerStatus.State.Terminated.Reason)
}
}
return reasons
}

View File

@@ -0,0 +1,282 @@
package strategies
import (
"context"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"testing"
v1 "k8s.io/api/core/v1"
policyv1 "k8s.io/api/policy/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
"sigs.k8s.io/descheduler/test"
)
var (
OneHourInSeconds uint = 3600
)
func TestRemoveFailedPods(t *testing.T) {
ctx := context.Background()
createStrategy := func(enabled, includingInitContainers bool, reasons, excludeKinds []string, minAgeSeconds *uint, nodeFit bool) api.DeschedulerStrategy {
return api.DeschedulerStrategy{
Enabled: enabled,
Params: &api.StrategyParameters{
FailedPods: &api.FailedPods{
Reasons: reasons,
IncludingInitContainers: includingInitContainers,
ExcludeOwnerKinds: excludeKinds,
MinPodLifetimeSeconds: minAgeSeconds,
},
NodeFit: nodeFit,
},
}
}
tests := []struct {
description string
nodes []*v1.Node
strategy api.DeschedulerStrategy
expectedEvictedPodCount int
pods []v1.Pod
}{
{
description: "default empty strategy, 0 failures, 0 evictions",
strategy: api.DeschedulerStrategy{},
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []v1.Pod{}, // no pods come back with field selector phase=Failed
},
{
description: "0 failures, 0 evictions",
strategy: createStrategy(true, false, nil, nil, nil, false),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []v1.Pod{}, // no pods come back with field selector phase=Failed
},
{
description: "1 container terminated with reason NodeAffinity, 1 eviction",
strategy: createStrategy(true, false, nil, nil, nil, false),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []v1.Pod{
buildTestPod("p1", "node1", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"},
}),
},
},
{
description: "1 init container terminated with reason NodeAffinity, 1 eviction",
strategy: createStrategy(true, true, nil, nil, nil, false),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []v1.Pod{
buildTestPod("p1", "node1", &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"},
}, nil),
},
},
{
description: "1 init container waiting with reason CreateContainerConfigError, 1 eviction",
strategy: createStrategy(true, true, nil, nil, nil, false),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []v1.Pod{
buildTestPod("p1", "node1", &v1.ContainerState{
Waiting: &v1.ContainerStateWaiting{Reason: "CreateContainerConfigError"},
}, nil),
},
},
{
description: "2 init container waiting with reason CreateContainerConfigError, 2 nodes, 2 evictions",
strategy: createStrategy(true, true, nil, nil, nil, false),
nodes: []*v1.Node{
test.BuildTestNode("node1", 2000, 3000, 10, nil),
test.BuildTestNode("node2", 2000, 3000, 10, nil),
},
expectedEvictedPodCount: 2,
pods: []v1.Pod{
buildTestPod("p1", "node1", &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "CreateContainerConfigError"},
}, nil),
buildTestPod("p2", "node2", &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "CreateContainerConfigError"},
}, nil),
},
},
{
description: "include reason=CreateContainerConfigError, 1 container terminated with reason CreateContainerConfigError, 1 eviction",
strategy: createStrategy(true, false, []string{"CreateContainerConfigError"}, nil, nil, false),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []v1.Pod{
buildTestPod("p1", "node1", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "CreateContainerConfigError"},
}),
},
},
{
description: "include reason=CreateContainerConfigError+NodeAffinity, 1 container terminated with reason CreateContainerConfigError, 1 eviction",
strategy: createStrategy(true, false, []string{"CreateContainerConfigError", "NodeAffinity"}, nil, nil, false),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []v1.Pod{
buildTestPod("p1", "node1", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "CreateContainerConfigError"},
}),
},
},
{
description: "include reason=CreateContainerConfigError, 1 container terminated with reason NodeAffinity, 0 eviction",
strategy: createStrategy(true, false, []string{"CreateContainerConfigError"}, nil, nil, false),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []v1.Pod{
buildTestPod("p1", "node1", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"},
}),
},
},
{
description: "include init container=false, 1 init container waiting with reason CreateContainerConfigError, 0 eviction",
strategy: createStrategy(true, false, []string{"CreateContainerConfigError"}, nil, nil, false),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []v1.Pod{
buildTestPod("p1", "node1", &v1.ContainerState{
Waiting: &v1.ContainerStateWaiting{Reason: "CreateContainerConfigError"},
}, nil),
},
},
{
description: "lifetime 1 hour, 1 container terminated with reason NodeAffinity, 0 eviction",
strategy: createStrategy(true, false, nil, nil, &OneHourInSeconds, false),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []v1.Pod{
buildTestPod("p1", "node1", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"},
}),
},
},
{
description: "nodeFit=true, 1 unschedulable node, 1 container terminated with reason NodeAffinity, 0 eviction",
strategy: createStrategy(true, false, nil, nil, nil, true),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, func(node *v1.Node) {
node.Spec.Unschedulable = true
})},
expectedEvictedPodCount: 0,
pods: []v1.Pod{
buildTestPod("p1", "node1", nil, &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"},
}),
},
},
{
description: "excluded owner kind=ReplicaSet, 1 init container terminated with owner kind=ReplicaSet, 0 eviction",
strategy: createStrategy(true, true, nil, []string{"ReplicaSet"}, nil, false),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 0,
pods: []v1.Pod{
buildTestPod("p1", "node1", &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"},
}, nil),
},
},
{
description: "excluded owner kind=DaemonSet, 1 init container terminated with owner kind=ReplicaSet, 1 eviction",
strategy: createStrategy(true, true, nil, []string{"DaemonSet"}, nil, false),
nodes: []*v1.Node{test.BuildTestNode("node1", 2000, 3000, 10, nil)},
expectedEvictedPodCount: 1,
pods: []v1.Pod{
buildTestPod("p1", "node1", &v1.ContainerState{
Terminated: &v1.ContainerStateTerminated{Reason: "NodeAffinity"},
}, nil),
},
},
}
for _, tc := range tests {
fakeClient := &fake.Clientset{}
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
return true, &v1.PodList{Items: tc.pods}, nil
})
podEvictor := evictions.NewPodEvictor(
fakeClient,
policyv1.SchemeGroupVersion.String(),
false,
100,
tc.nodes,
false,
false,
false,
)
RemoveFailedPods(ctx, fakeClient, tc.strategy, tc.nodes, podEvictor)
actualEvictedPodCount := podEvictor.TotalEvicted()
if actualEvictedPodCount != tc.expectedEvictedPodCount {
t.Errorf("Test %#v failed, expected %v pod evictions, but got %v pod evictions\n", tc.description, tc.expectedEvictedPodCount, actualEvictedPodCount)
}
}
}
func TestValidRemoveFailedPodsParams(t *testing.T) {
ctx := context.Background()
fakeClient := &fake.Clientset{}
testCases := []struct {
name string
params *api.StrategyParameters
}{
{name: "validate nil params", params: nil},
{name: "validate empty params", params: &api.StrategyParameters{}},
{name: "validate reasons params", params: &api.StrategyParameters{FailedPods: &api.FailedPods{
Reasons: []string{"CreateContainerConfigError"},
}}},
{name: "validate includingInitContainers params", params: &api.StrategyParameters{FailedPods: &api.FailedPods{
IncludingInitContainers: true,
}}},
{name: "validate excludeOwnerKinds params", params: &api.StrategyParameters{FailedPods: &api.FailedPods{
ExcludeOwnerKinds: []string{"Job"},
}}},
{name: "validate excludeOwnerKinds params", params: &api.StrategyParameters{FailedPods: &api.FailedPods{
MinPodLifetimeSeconds: &OneHourInSeconds,
}}},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
t.Parallel()
params, err := validateAndParseRemoveFailedPodsParams(ctx, fakeClient, tc.params)
if err != nil {
t.Errorf("strategy params should be valid but got err: %v", err.Error())
}
if params == nil {
t.Errorf("strategy params should return a ValidatedFailedPodsStrategyParams but got nil")
}
})
}
}
func buildTestPod(podName, nodeName string, initContainerState, containerState *v1.ContainerState) v1.Pod {
pod := test.BuildTestPod(podName, 1, 1, nodeName, func(p *v1.Pod) {
ps := v1.PodStatus{}
if initContainerState != nil {
ps.InitContainerStatuses = []v1.ContainerStatus{{State: *initContainerState}}
ps.Phase = v1.PodFailed
}
if containerState != nil {
ps.ContainerStatuses = []v1.ContainerStatus{{State: *containerState}}
ps.Phase = v1.PodFailed
}
p.Status = ps
})
pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList()
pod.ObjectMeta.SetCreationTimestamp(metav1.Now())
return *pod
}

View File

@@ -1,432 +0,0 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package strategies
import (
"context"
"sort"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
)
type NodeUsageMap struct {
node *v1.Node
usage api.ResourceThresholds
allPods []*v1.Pod
}
type NodePodsMap map[*v1.Node][]*v1.Pod
func LowNodeUtilization(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, evictLocalStoragePods bool, podEvictor *evictions.PodEvictor) {
if !strategy.Enabled {
return
}
// todo: move to config validation?
// TODO: May be create a struct for the strategy as well, so that we don't have to pass along the all the params?
if strategy.Params.NodeResourceUtilizationThresholds == nil {
klog.V(1).Infof("NodeResourceUtilizationThresholds not set")
return
}
thresholds := strategy.Params.NodeResourceUtilizationThresholds.Thresholds
if !validateThresholds(thresholds) {
return
}
targetThresholds := strategy.Params.NodeResourceUtilizationThresholds.TargetThresholds
if !validateTargetThresholds(targetThresholds) {
return
}
npm := createNodePodsMap(ctx, client, nodes)
lowNodes, targetNodes := classifyNodes(npm, thresholds, targetThresholds, evictLocalStoragePods)
klog.V(1).Infof("Criteria for a node under utilization: CPU: %v, Mem: %v, Pods: %v",
thresholds[v1.ResourceCPU], thresholds[v1.ResourceMemory], thresholds[v1.ResourcePods])
if len(lowNodes) == 0 {
klog.V(1).Infof("No node is underutilized, nothing to do here, you might tune your thresholds further")
return
}
klog.V(1).Infof("Total number of underutilized nodes: %v", len(lowNodes))
if len(lowNodes) < strategy.Params.NodeResourceUtilizationThresholds.NumberOfNodes {
klog.V(1).Infof("number of nodes underutilized (%v) is less than NumberOfNodes (%v), nothing to do here", len(lowNodes), strategy.Params.NodeResourceUtilizationThresholds.NumberOfNodes)
return
}
if len(lowNodes) == len(nodes) {
klog.V(1).Infof("all nodes are underutilized, nothing to do here")
return
}
if len(targetNodes) == 0 {
klog.V(1).Infof("all nodes are under target utilization, nothing to do here")
return
}
klog.V(1).Infof("Criteria for a node above target utilization: CPU: %v, Mem: %v, Pods: %v",
targetThresholds[v1.ResourceCPU], targetThresholds[v1.ResourceMemory], targetThresholds[v1.ResourcePods])
klog.V(1).Infof("Total number of nodes above target utilization: %v", len(targetNodes))
evictPodsFromTargetNodes(
ctx,
targetNodes,
lowNodes,
targetThresholds,
evictLocalStoragePods,
podEvictor)
klog.V(1).Infof("Total number of pods evicted: %v", podEvictor.TotalEvicted())
}
func validateThresholds(thresholds api.ResourceThresholds) bool {
if thresholds == nil || len(thresholds) == 0 {
klog.V(1).Infof("no resource threshold is configured")
return false
}
for name := range thresholds {
switch name {
case v1.ResourceCPU:
continue
case v1.ResourceMemory:
continue
case v1.ResourcePods:
continue
default:
klog.Errorf("only cpu, memory, or pods thresholds can be specified")
return false
}
}
return true
}
//This function could be merged into above once we are clear.
func validateTargetThresholds(targetThresholds api.ResourceThresholds) bool {
if targetThresholds == nil {
klog.V(1).Infof("no target resource threshold is configured")
return false
} else if _, ok := targetThresholds[v1.ResourcePods]; !ok {
klog.V(1).Infof("no target resource threshold for pods is configured")
return false
}
return true
}
// classifyNodes classifies the nodes into low-utilization or high-utilization nodes. If a node lies between
// low and high thresholds, it is simply ignored.
func classifyNodes(npm NodePodsMap, thresholds api.ResourceThresholds, targetThresholds api.ResourceThresholds, evictLocalStoragePods bool) ([]NodeUsageMap, []NodeUsageMap) {
lowNodes, targetNodes := []NodeUsageMap{}, []NodeUsageMap{}
for node, pods := range npm {
usage := nodeUtilization(node, pods, evictLocalStoragePods)
nuMap := NodeUsageMap{
node: node,
usage: usage,
allPods: pods,
}
// Check if node is underutilized and if we can schedule pods on it.
if !nodeutil.IsNodeUnschedulable(node) && IsNodeWithLowUtilization(usage, thresholds) {
klog.V(2).Infof("Node %#v is under utilized with usage: %#v", node.Name, usage)
lowNodes = append(lowNodes, nuMap)
} else if IsNodeAboveTargetUtilization(usage, targetThresholds) {
klog.V(2).Infof("Node %#v is over utilized with usage: %#v", node.Name, usage)
targetNodes = append(targetNodes, nuMap)
} else {
klog.V(2).Infof("Node %#v is appropriately utilized with usage: %#v", node.Name, usage)
}
}
return lowNodes, targetNodes
}
// evictPodsFromTargetNodes evicts pods based on priority, if all the pods on the node have priority, if not
// evicts them based on QoS as fallback option.
// TODO: @ravig Break this function into smaller functions.
func evictPodsFromTargetNodes(
ctx context.Context,
targetNodes, lowNodes []NodeUsageMap,
targetThresholds api.ResourceThresholds,
evictLocalStoragePods bool,
podEvictor *evictions.PodEvictor,
) {
SortNodesByUsage(targetNodes)
// upper bound on total number of pods/cpu/memory to be moved
var totalPods, totalCPU, totalMem float64
var taintsOfLowNodes = make(map[string][]v1.Taint, len(lowNodes))
for _, node := range lowNodes {
taintsOfLowNodes[node.node.Name] = node.node.Spec.Taints
nodeCapacity := node.node.Status.Capacity
if len(node.node.Status.Allocatable) > 0 {
nodeCapacity = node.node.Status.Allocatable
}
// totalPods to be moved
podsPercentage := targetThresholds[v1.ResourcePods] - node.usage[v1.ResourcePods]
totalPods += ((float64(podsPercentage) * float64(nodeCapacity.Pods().Value())) / 100)
// totalCPU capacity to be moved
if _, ok := targetThresholds[v1.ResourceCPU]; ok {
cpuPercentage := targetThresholds[v1.ResourceCPU] - node.usage[v1.ResourceCPU]
totalCPU += ((float64(cpuPercentage) * float64(nodeCapacity.Cpu().MilliValue())) / 100)
}
// totalMem capacity to be moved
if _, ok := targetThresholds[v1.ResourceMemory]; ok {
memPercentage := targetThresholds[v1.ResourceMemory] - node.usage[v1.ResourceMemory]
totalMem += ((float64(memPercentage) * float64(nodeCapacity.Memory().Value())) / 100)
}
}
klog.V(1).Infof("Total capacity to be moved: CPU:%v, Mem:%v, Pods:%v", totalCPU, totalMem, totalPods)
klog.V(1).Infof("********Number of pods evicted from each node:***********")
for _, node := range targetNodes {
nodeCapacity := node.node.Status.Capacity
if len(node.node.Status.Allocatable) > 0 {
nodeCapacity = node.node.Status.Allocatable
}
klog.V(3).Infof("evicting pods from node %#v with usage: %#v", node.node.Name, node.usage)
nonRemovablePods, bestEffortPods, burstablePods, guaranteedPods := classifyPods(node.allPods, evictLocalStoragePods)
klog.V(2).Infof("allPods:%v, nonRemovablePods:%v, bestEffortPods:%v, burstablePods:%v, guaranteedPods:%v", len(node.allPods), len(nonRemovablePods), len(bestEffortPods), len(burstablePods), len(guaranteedPods))
// Check if one pod has priority, if yes, assume that all pods have priority and evict pods based on priority.
if node.allPods[0].Spec.Priority != nil {
klog.V(1).Infof("All pods have priority associated with them. Evicting pods based on priority")
evictablePods := make([]*v1.Pod, 0)
evictablePods = append(append(burstablePods, bestEffortPods...), guaranteedPods...)
// sort the evictable Pods based on priority. This also sorts them based on QoS. If there are multiple pods with same priority, they are sorted based on QoS tiers.
sortPodsBasedOnPriority(evictablePods)
evictPods(ctx, evictablePods, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, taintsOfLowNodes, podEvictor, node.node)
} else {
// TODO: Remove this when we support only priority.
// Falling back to evicting pods based on priority.
klog.V(1).Infof("Evicting pods based on QoS")
klog.V(1).Infof("There are %v non-evictable pods on the node", len(nonRemovablePods))
// evict best effort pods
evictPods(ctx, bestEffortPods, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, taintsOfLowNodes, podEvictor, node.node)
// evict burstable pods
evictPods(ctx, burstablePods, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, taintsOfLowNodes, podEvictor, node.node)
// evict guaranteed pods
evictPods(ctx, guaranteedPods, targetThresholds, nodeCapacity, node.usage, &totalPods, &totalCPU, &totalMem, taintsOfLowNodes, podEvictor, node.node)
}
klog.V(1).Infof("%v pods evicted from node %#v with usage %v", podEvictor.NodeEvicted(node.node), node.node.Name, node.usage)
}
}
func evictPods(
ctx context.Context,
inputPods []*v1.Pod,
targetThresholds api.ResourceThresholds,
nodeCapacity v1.ResourceList,
nodeUsage api.ResourceThresholds,
totalPods *float64,
totalCPU *float64,
totalMem *float64,
taintsOfLowNodes map[string][]v1.Taint,
podEvictor *evictions.PodEvictor,
node *v1.Node) {
if IsNodeAboveTargetUtilization(nodeUsage, targetThresholds) && (*totalPods > 0 || *totalCPU > 0 || *totalMem > 0) {
onePodPercentage := api.Percentage((float64(1) * 100) / float64(nodeCapacity.Pods().Value()))
for _, pod := range inputPods {
if !utils.PodToleratesTaints(pod, taintsOfLowNodes) {
klog.V(3).Infof("Skipping eviction for Pod: %#v, doesn't tolerate node taint", pod.Name)
continue
}
cUsage := utils.GetResourceRequest(pod, v1.ResourceCPU)
mUsage := utils.GetResourceRequest(pod, v1.ResourceMemory)
success, err := podEvictor.EvictPod(ctx, pod, node)
if err != nil {
klog.Errorf("Error evicting pod: (%#v)", err)
break
}
if success {
klog.V(3).Infof("Evicted pod: %#v", pod.Name)
// update remaining pods
nodeUsage[v1.ResourcePods] -= onePodPercentage
*totalPods--
// update remaining cpu
*totalCPU -= float64(cUsage)
nodeUsage[v1.ResourceCPU] -= api.Percentage((float64(cUsage) * 100) / float64(nodeCapacity.Cpu().MilliValue()))
// update remaining memory
*totalMem -= float64(mUsage)
nodeUsage[v1.ResourceMemory] -= api.Percentage(float64(mUsage) / float64(nodeCapacity.Memory().Value()) * 100)
klog.V(3).Infof("updated node usage: %#v", nodeUsage)
// check if node utilization drops below target threshold or required capacity (cpu, memory, pods) is moved
if !IsNodeAboveTargetUtilization(nodeUsage, targetThresholds) || (*totalPods <= 0 && *totalCPU <= 0 && *totalMem <= 0) {
break
}
}
}
}
}
func SortNodesByUsage(nodes []NodeUsageMap) {
sort.Slice(nodes, func(i, j int) bool {
var ti, tj api.Percentage
for name, value := range nodes[i].usage {
if name == v1.ResourceCPU || name == v1.ResourceMemory || name == v1.ResourcePods {
ti += value
}
}
for name, value := range nodes[j].usage {
if name == v1.ResourceCPU || name == v1.ResourceMemory || name == v1.ResourcePods {
tj += value
}
}
// To return sorted in descending order
return ti > tj
})
}
// sortPodsBasedOnPriority sorts pods based on priority and if their priorities are equal, they are sorted based on QoS tiers.
func sortPodsBasedOnPriority(evictablePods []*v1.Pod) {
sort.Slice(evictablePods, func(i, j int) bool {
if evictablePods[i].Spec.Priority == nil && evictablePods[j].Spec.Priority != nil {
return true
}
if evictablePods[j].Spec.Priority == nil && evictablePods[i].Spec.Priority != nil {
return false
}
if (evictablePods[j].Spec.Priority == nil && evictablePods[i].Spec.Priority == nil) || (*evictablePods[i].Spec.Priority == *evictablePods[j].Spec.Priority) {
if podutil.IsBestEffortPod(evictablePods[i]) {
return true
}
if podutil.IsBurstablePod(evictablePods[i]) && podutil.IsGuaranteedPod(evictablePods[j]) {
return true
}
return false
}
return *evictablePods[i].Spec.Priority < *evictablePods[j].Spec.Priority
})
}
// createNodePodsMap returns nodepodsmap with evictable pods on node.
func createNodePodsMap(ctx context.Context, client clientset.Interface, nodes []*v1.Node) NodePodsMap {
npm := NodePodsMap{}
for _, node := range nodes {
pods, err := podutil.ListPodsOnANode(ctx, client, node)
if err != nil {
klog.Warningf("node %s will not be processed, error in accessing its pods (%#v)", node.Name, err)
} else {
npm[node] = pods
}
}
return npm
}
func IsNodeAboveTargetUtilization(nodeThresholds api.ResourceThresholds, thresholds api.ResourceThresholds) bool {
for name, nodeValue := range nodeThresholds {
if name == v1.ResourceCPU || name == v1.ResourceMemory || name == v1.ResourcePods {
if value, ok := thresholds[name]; !ok {
continue
} else if nodeValue > value {
return true
}
}
}
return false
}
func IsNodeWithLowUtilization(nodeThresholds api.ResourceThresholds, thresholds api.ResourceThresholds) bool {
for name, nodeValue := range nodeThresholds {
if name == v1.ResourceCPU || name == v1.ResourceMemory || name == v1.ResourcePods {
if value, ok := thresholds[name]; !ok {
continue
} else if nodeValue > value {
return false
}
}
}
return true
}
func nodeUtilization(node *v1.Node, pods []*v1.Pod, evictLocalStoragePods bool) api.ResourceThresholds {
totalReqs := map[v1.ResourceName]*resource.Quantity{
v1.ResourceCPU: {},
v1.ResourceMemory: {},
}
for _, pod := range pods {
req, _ := utils.PodRequestsAndLimits(pod)
for name, quantity := range req {
if name == v1.ResourceCPU || name == v1.ResourceMemory {
// As Quantity.Add says: Add adds the provided y quantity to the current value. If the current value is zero,
// the format of the quantity will be updated to the format of y.
totalReqs[name].Add(quantity)
}
}
}
nodeCapacity := node.Status.Capacity
if len(node.Status.Allocatable) > 0 {
nodeCapacity = node.Status.Allocatable
}
totalPods := len(pods)
return api.ResourceThresholds{
v1.ResourceCPU: api.Percentage((float64(totalReqs[v1.ResourceCPU].MilliValue()) * 100) / float64(nodeCapacity.Cpu().MilliValue())),
v1.ResourceMemory: api.Percentage(float64(totalReqs[v1.ResourceMemory].Value()) / float64(nodeCapacity.Memory().Value()) * 100),
v1.ResourcePods: api.Percentage((float64(totalPods) * 100) / float64(nodeCapacity.Pods().Value())),
}
}
func classifyPods(pods []*v1.Pod, evictLocalStoragePods bool) ([]*v1.Pod, []*v1.Pod, []*v1.Pod, []*v1.Pod) {
var nonRemovablePods, bestEffortPods, burstablePods, guaranteedPods []*v1.Pod
// From https://kubernetes.io/docs/tasks/configure-pod-container/quality-service-pod/
//
// For a Pod to be given a QoS class of Guaranteed:
// - every Container in the Pod must have a memory limit and a memory request, and they must be the same.
// - every Container in the Pod must have a CPU limit and a CPU request, and they must be the same.
// A Pod is given a QoS class of Burstable if:
// - the Pod does not meet the criteria for QoS class Guaranteed.
// - at least one Container in the Pod has a memory or CPU request.
// For a Pod to be given a QoS class of BestEffort, the Containers in the Pod must not have any memory or CPU limits or requests.
for _, pod := range pods {
if !podutil.IsEvictable(pod, evictLocalStoragePods) {
nonRemovablePods = append(nonRemovablePods, pod)
continue
}
switch utils.GetPodQOS(pod) {
case v1.PodQOSGuaranteed:
guaranteedPods = append(guaranteedPods, pod)
case v1.PodQOSBurstable:
burstablePods = append(burstablePods, pod)
default: // alias v1.PodQOSBestEffort
bestEffortPods = append(bestEffortPods, pod)
}
}
return nonRemovablePods, bestEffortPods, burstablePods, guaranteedPods
}

View File

@@ -1,642 +0,0 @@
/*
Copyright 2017 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package strategies
import (
"context"
"fmt"
"strings"
"testing"
"reflect"
v1 "k8s.io/api/core/v1"
"k8s.io/api/policy/v1beta1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/fields"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/apimachinery/pkg/runtime/schema"
serializer "k8s.io/apimachinery/pkg/runtime/serializer"
"k8s.io/apimachinery/pkg/watch"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
"sigs.k8s.io/descheduler/pkg/utils"
"sigs.k8s.io/descheduler/test"
)
var (
lowPriority = int32(0)
highPriority = int32(10000)
)
func setRSOwnerRef(pod *v1.Pod) { pod.ObjectMeta.OwnerReferences = test.GetReplicaSetOwnerRefList() }
func setDSOwnerRef(pod *v1.Pod) { pod.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList() }
func setNormalOwnerRef(pod *v1.Pod) { pod.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList() }
func setHighPriority(pod *v1.Pod) { pod.Spec.Priority = &highPriority }
func setLowPriority(pod *v1.Pod) { pod.Spec.Priority = &lowPriority }
func setNodeUnschedulable(node *v1.Node) { node.Spec.Unschedulable = true }
func makeBestEffortPod(pod *v1.Pod) {
pod.Spec.Containers[0].Resources.Requests = nil
pod.Spec.Containers[0].Resources.Requests = nil
pod.Spec.Containers[0].Resources.Limits = nil
pod.Spec.Containers[0].Resources.Limits = nil
}
func makeBurstablePod(pod *v1.Pod) {
pod.Spec.Containers[0].Resources.Limits = nil
pod.Spec.Containers[0].Resources.Limits = nil
}
func makeGuaranteedPod(pod *v1.Pod) {
pod.Spec.Containers[0].Resources.Limits[v1.ResourceCPU] = pod.Spec.Containers[0].Resources.Requests[v1.ResourceCPU]
pod.Spec.Containers[0].Resources.Limits[v1.ResourceMemory] = pod.Spec.Containers[0].Resources.Requests[v1.ResourceMemory]
}
func TestLowNodeUtilization(t *testing.T) {
ctx := context.Background()
n1NodeName := "n1"
n2NodeName := "n2"
n3NodeName := "n3"
testCases := []struct {
name string
thresholds, targetThresholds api.ResourceThresholds
nodes map[string]*v1.Node
pods map[string]*v1.PodList
expectedPodsEvicted int
evictedPods []string
}{
{
name: "without priorities",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
v1.ResourcePods: 30,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 50,
v1.ResourcePods: 50,
},
nodes: map[string]*v1.Node{
n1NodeName: test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil),
n2NodeName: test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
n3NodeName: test.BuildTestNode(n3NodeName, 4000, 3000, 10, setNodeUnschedulable),
},
pods: map[string]*v1.PodList{
n1NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p1", 400, 0, n1NodeName, setRSOwnerRef),
*test.BuildTestPod("p2", 400, 0, n1NodeName, setRSOwnerRef),
*test.BuildTestPod("p3", 400, 0, n1NodeName, setRSOwnerRef),
*test.BuildTestPod("p4", 400, 0, n1NodeName, setRSOwnerRef),
*test.BuildTestPod("p5", 400, 0, n1NodeName, setRSOwnerRef),
// These won't be evicted.
*test.BuildTestPod("p6", 400, 0, n1NodeName, setDSOwnerRef),
*test.BuildTestPod("p7", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A pod with local storage.
setNormalOwnerRef(pod)
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
// A Mirror Pod.
pod.Annotations = test.GetMirrorPodAnnotation()
}),
*test.BuildTestPod("p8", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A Critical Pod.
pod.Namespace = "kube-system"
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
}),
},
},
n2NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p9", 400, 0, n1NodeName, setRSOwnerRef),
},
},
n3NodeName: {},
},
expectedPodsEvicted: 3,
},
{
name: "with priorities",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
v1.ResourcePods: 30,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 50,
v1.ResourcePods: 50,
},
nodes: map[string]*v1.Node{
n1NodeName: test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil),
n2NodeName: test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
n3NodeName: test.BuildTestNode(n3NodeName, 4000, 3000, 10, setNodeUnschedulable),
},
pods: map[string]*v1.PodList{
n1NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p1", 400, 0, n1NodeName, func(pod *v1.Pod) {
setRSOwnerRef(pod)
setHighPriority(pod)
}),
*test.BuildTestPod("p2", 400, 0, n1NodeName, func(pod *v1.Pod) {
setRSOwnerRef(pod)
setHighPriority(pod)
}),
*test.BuildTestPod("p3", 400, 0, n1NodeName, func(pod *v1.Pod) {
setRSOwnerRef(pod)
setHighPriority(pod)
}),
*test.BuildTestPod("p4", 400, 0, n1NodeName, func(pod *v1.Pod) {
setRSOwnerRef(pod)
setHighPriority(pod)
}),
*test.BuildTestPod("p5", 400, 0, n1NodeName, func(pod *v1.Pod) {
setRSOwnerRef(pod)
setLowPriority(pod)
}),
// These won't be evicted.
*test.BuildTestPod("p6", 400, 0, n1NodeName, func(pod *v1.Pod) {
setDSOwnerRef(pod)
setHighPriority(pod)
}),
*test.BuildTestPod("p7", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A pod with local storage.
setNormalOwnerRef(pod)
setLowPriority(pod)
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
// A Mirror Pod.
pod.Annotations = test.GetMirrorPodAnnotation()
}),
*test.BuildTestPod("p8", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A Critical Pod.
pod.Namespace = "kube-system"
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
}),
},
},
n2NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p9", 400, 0, n1NodeName, setRSOwnerRef),
},
},
n3NodeName: {},
},
expectedPodsEvicted: 3,
},
{
name: "without priorities evicting best-effort pods only",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
v1.ResourcePods: 30,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 50,
v1.ResourcePods: 50,
},
nodes: map[string]*v1.Node{
n1NodeName: test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil),
n2NodeName: test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
n3NodeName: test.BuildTestNode(n3NodeName, 4000, 3000, 10, setNodeUnschedulable),
},
// All pods are assumed to be burstable (test.BuildTestNode always sets both cpu/memory resource requests to some value)
pods: map[string]*v1.PodList{
n1NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p1", 400, 0, n1NodeName, func(pod *v1.Pod) {
setRSOwnerRef(pod)
makeBestEffortPod(pod)
}),
*test.BuildTestPod("p2", 400, 0, n1NodeName, func(pod *v1.Pod) {
setRSOwnerRef(pod)
makeBestEffortPod(pod)
}),
*test.BuildTestPod("p3", 400, 0, n1NodeName, func(pod *v1.Pod) {
setRSOwnerRef(pod)
}),
*test.BuildTestPod("p4", 400, 0, n1NodeName, func(pod *v1.Pod) {
setRSOwnerRef(pod)
makeBestEffortPod(pod)
}),
*test.BuildTestPod("p5", 400, 0, n1NodeName, func(pod *v1.Pod) {
setRSOwnerRef(pod)
makeBestEffortPod(pod)
}),
// These won't be evicted.
*test.BuildTestPod("p6", 400, 0, n1NodeName, func(pod *v1.Pod) {
setDSOwnerRef(pod)
}),
*test.BuildTestPod("p7", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A pod with local storage.
setNormalOwnerRef(pod)
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
// A Mirror Pod.
pod.Annotations = test.GetMirrorPodAnnotation()
}),
*test.BuildTestPod("p8", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A Critical Pod.
pod.Namespace = "kube-system"
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
}),
},
},
n2NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p9", 400, 0, n1NodeName, setRSOwnerRef),
},
},
n3NodeName: {},
},
expectedPodsEvicted: 4,
evictedPods: []string{"p1", "p2", "p4", "p5"},
},
}
for _, test := range testCases {
t.Run(test.name, func(t *testing.T) {
fakeClient := &fake.Clientset{}
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
list := action.(core.ListAction)
fieldString := list.GetListRestrictions().Fields.String()
if strings.Contains(fieldString, n1NodeName) {
return true, test.pods[n1NodeName], nil
}
if strings.Contains(fieldString, n2NodeName) {
return true, test.pods[n2NodeName], nil
}
if strings.Contains(fieldString, n3NodeName) {
return true, test.pods[n3NodeName], nil
}
return true, nil, fmt.Errorf("Failed to list: %v", list)
})
fakeClient.Fake.AddReactor("get", "nodes", func(action core.Action) (bool, runtime.Object, error) {
getAction := action.(core.GetAction)
if node, exists := test.nodes[getAction.GetName()]; exists {
return true, node, nil
}
return true, nil, fmt.Errorf("Wrong node: %v", getAction.GetName())
})
podsForEviction := make(map[string]struct{})
for _, pod := range test.evictedPods {
podsForEviction[pod] = struct{}{}
}
evictionFailed := false
if len(test.evictedPods) > 0 {
fakeClient.Fake.AddReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) {
getAction := action.(core.CreateAction)
obj := getAction.GetObject()
if eviction, ok := obj.(*v1beta1.Eviction); ok {
if _, exists := podsForEviction[eviction.Name]; exists {
return true, obj, nil
}
evictionFailed = true
return true, nil, fmt.Errorf("pod %q was unexpectedly evicted", eviction.Name)
}
return true, obj, nil
})
}
var nodes []*v1.Node
for _, node := range test.nodes {
nodes = append(nodes, node)
}
npm := createNodePodsMap(ctx, fakeClient, nodes)
lowNodes, targetNodes := classifyNodes(npm, test.thresholds, test.targetThresholds, false)
if len(lowNodes) != 1 {
t.Errorf("After ignoring unschedulable nodes, expected only one node to be under utilized.")
}
podEvictor := evictions.NewPodEvictor(
fakeClient,
"v1",
false,
test.expectedPodsEvicted,
nodes,
)
evictPodsFromTargetNodes(ctx, targetNodes, lowNodes, test.targetThresholds, false, podEvictor)
podsEvicted := podEvictor.TotalEvicted()
if test.expectedPodsEvicted != podsEvicted {
t.Errorf("Expected %#v pods to be evicted but %#v got evicted", test.expectedPodsEvicted, podsEvicted)
}
if evictionFailed {
t.Errorf("Pod evictions failed unexpectedly")
}
})
}
}
func TestSortPodsByPriority(t *testing.T) {
n1 := test.BuildTestNode("n1", 4000, 3000, 9, nil)
p1 := test.BuildTestPod("p1", 400, 0, n1.Name, setLowPriority)
// BestEffort
p2 := test.BuildTestPod("p2", 400, 0, n1.Name, func(pod *v1.Pod) {
setHighPriority(pod)
makeBestEffortPod(pod)
})
// Burstable
p3 := test.BuildTestPod("p3", 400, 0, n1.Name, func(pod *v1.Pod) {
setHighPriority(pod)
makeBurstablePod(pod)
})
// Guaranteed
p4 := test.BuildTestPod("p4", 400, 100, n1.Name, func(pod *v1.Pod) {
setHighPriority(pod)
makeGuaranteedPod(pod)
})
// Best effort with nil priorities.
p5 := test.BuildTestPod("p5", 400, 100, n1.Name, makeBestEffortPod)
p5.Spec.Priority = nil
p6 := test.BuildTestPod("p6", 400, 100, n1.Name, makeGuaranteedPod)
p6.Spec.Priority = nil
podList := []*v1.Pod{p4, p3, p2, p1, p6, p5}
sortPodsBasedOnPriority(podList)
if !reflect.DeepEqual(podList[len(podList)-1], p4) {
t.Errorf("Expected last pod in sorted list to be %v which of highest priority and guaranteed but got %v", p4, podList[len(podList)-1])
}
}
func TestValidateThresholds(t *testing.T) {
tests := []struct {
name string
input api.ResourceThresholds
succeed bool
}{
{
name: "passing nil map for threshold",
input: nil,
succeed: false,
},
{
name: "passing no threshold",
input: api.ResourceThresholds{},
succeed: false,
},
{
name: "passing unsupported resource name",
input: api.ResourceThresholds{
v1.ResourceCPU: 40,
v1.ResourceStorage: 25.5,
},
succeed: false,
},
{
name: "passing invalid resource name",
input: api.ResourceThresholds{
v1.ResourceCPU: 40,
"coolResource": 42.0,
},
succeed: false,
},
{
name: "passing a valid threshold with cpu, memory and pods",
input: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourceMemory: 30,
v1.ResourcePods: 40,
},
succeed: true,
},
}
for _, test := range tests {
isValid := validateThresholds(test.input)
if isValid != test.succeed {
t.Errorf("expected validity of threshold: %#v\nto be %v but got %v instead", test.input, test.succeed, isValid)
}
}
}
func newFake(objects ...runtime.Object) *core.Fake {
scheme := runtime.NewScheme()
codecs := serializer.NewCodecFactory(scheme)
fake.AddToScheme(scheme)
o := core.NewObjectTracker(scheme, codecs.UniversalDecoder())
for _, obj := range objects {
if err := o.Add(obj); err != nil {
panic(err)
}
}
fakePtr := core.Fake{}
fakePtr.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
objs, err := o.List(
schema.GroupVersionResource{Group: "", Version: "v1", Resource: "pods"},
schema.GroupVersionKind{Group: "", Version: "v1", Kind: "Pod"},
action.GetNamespace(),
)
if err != nil {
return true, nil, err
}
obj := &v1.PodList{
Items: []v1.Pod{},
}
for _, pod := range objs.(*v1.PodList).Items {
podFieldSet := fields.Set(map[string]string{
"spec.nodeName": pod.Spec.NodeName,
"status.phase": string(pod.Status.Phase),
})
match := action.(core.ListAction).GetListRestrictions().Fields.Matches(podFieldSet)
if !match {
continue
}
obj.Items = append(obj.Items, *pod.DeepCopy())
}
return true, obj, nil
})
fakePtr.AddReactor("*", "*", core.ObjectReaction(o))
fakePtr.AddWatchReactor("*", core.DefaultWatchReactor(watch.NewFake(), nil))
return &fakePtr
}
func TestWithTaints(t *testing.T) {
ctx := context.Background()
strategy := api.DeschedulerStrategy{
Enabled: true,
Params: api.StrategyParameters{
NodeResourceUtilizationThresholds: &api.NodeResourceUtilizationThresholds{
Thresholds: api.ResourceThresholds{
v1.ResourcePods: 20,
},
TargetThresholds: api.ResourceThresholds{
v1.ResourcePods: 70,
},
},
},
}
n1 := test.BuildTestNode("n1", 2000, 3000, 10, nil)
n2 := test.BuildTestNode("n2", 1000, 3000, 10, nil)
n3 := test.BuildTestNode("n3", 1000, 3000, 10, nil)
n3withTaints := n3.DeepCopy()
n3withTaints.Spec.Taints = []v1.Taint{
{
Key: "key",
Value: "value",
Effect: v1.TaintEffectNoSchedule,
},
}
podThatToleratesTaint := test.BuildTestPod("tolerate_pod", 200, 0, n1.Name, setRSOwnerRef)
podThatToleratesTaint.Spec.Tolerations = []v1.Toleration{
{
Key: "key",
Value: "value",
},
}
tests := []struct {
name string
nodes []*v1.Node
pods []*v1.Pod
evictionsExpected int
}{
{
name: "No taints",
nodes: []*v1.Node{n1, n2, n3},
pods: []*v1.Pod{
//Node 1 pods
test.BuildTestPod(fmt.Sprintf("pod_1_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_2_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_3_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_4_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_5_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_6_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_7_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_8_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
// Node 2 pods
test.BuildTestPod(fmt.Sprintf("pod_9_%s", n2.Name), 200, 0, n2.Name, setRSOwnerRef),
},
evictionsExpected: 1,
},
{
name: "No pod tolerates node taint",
nodes: []*v1.Node{n1, n3withTaints},
pods: []*v1.Pod{
//Node 1 pods
test.BuildTestPod(fmt.Sprintf("pod_1_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_2_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_3_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_4_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_5_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_6_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_7_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_8_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
// Node 3 pods
test.BuildTestPod(fmt.Sprintf("pod_9_%s", n3withTaints.Name), 200, 0, n3withTaints.Name, setRSOwnerRef),
},
evictionsExpected: 0,
},
{
name: "Pod which tolerates node taint",
nodes: []*v1.Node{n1, n3withTaints},
pods: []*v1.Pod{
//Node 1 pods
test.BuildTestPod(fmt.Sprintf("pod_1_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_2_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_3_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_4_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_5_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_6_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_7_%s", n1.Name), 200, 0, n1.Name, setRSOwnerRef),
podThatToleratesTaint,
// Node 3 pods
test.BuildTestPod(fmt.Sprintf("pod_9_%s", n3withTaints.Name), 200, 0, n3withTaints.Name, setRSOwnerRef),
},
evictionsExpected: 1,
},
}
for _, item := range tests {
t.Run(item.name, func(t *testing.T) {
var objs []runtime.Object
for _, node := range item.nodes {
objs = append(objs, node)
}
for _, pod := range item.pods {
objs = append(objs, pod)
}
fakePtr := newFake(objs...)
var evictionCounter int
fakePtr.PrependReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) {
if action.GetSubresource() != "eviction" || action.GetResource().Resource != "pods" {
return false, nil, nil
}
evictionCounter++
return true, nil, nil
})
podEvictor := evictions.NewPodEvictor(
&fake.Clientset{Fake: *fakePtr},
"policy/v1",
false,
item.evictionsExpected,
item.nodes,
)
LowNodeUtilization(ctx, &fake.Clientset{Fake: *fakePtr}, strategy, item.nodes, false, podEvictor)
if item.evictionsExpected != evictionCounter {
t.Errorf("Expected %v evictions, got %v", item.evictionsExpected, evictionCounter)
}
})
}
}

View File

@@ -18,46 +18,96 @@ package strategies
import (
"context"
"fmt"
v1 "k8s.io/api/core/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog"
"k8s.io/klog/v2"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
)
func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, evictLocalStoragePods bool, podEvictor *evictions.PodEvictor) {
func validatePodsViolatingNodeAffinityParams(params *api.StrategyParameters) error {
if params == nil || len(params.NodeAffinityType) == 0 {
return fmt.Errorf("NodeAffinityType is empty")
}
// At most one of include/exclude can be set
if params.Namespaces != nil && len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 {
return fmt.Errorf("only one of Include/Exclude namespaces can be set")
}
if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" {
return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set")
}
return nil
}
// RemovePodsViolatingNodeAffinity evicts pods on nodes which violate node affinity
func RemovePodsViolatingNodeAffinity(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor) {
if err := validatePodsViolatingNodeAffinityParams(strategy.Params); err != nil {
klog.ErrorS(err, "Invalid RemovePodsViolatingNodeAffinity parameters")
return
}
thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params)
if err != nil {
klog.ErrorS(err, "Failed to get threshold priority from strategy's params")
return
}
var includedNamespaces, excludedNamespaces []string
if strategy.Params.Namespaces != nil {
includedNamespaces = strategy.Params.Namespaces.Include
excludedNamespaces = strategy.Params.Namespaces.Exclude
}
nodeFit := false
if strategy.Params != nil {
nodeFit = strategy.Params.NodeFit
}
evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit))
for _, nodeAffinity := range strategy.Params.NodeAffinityType {
klog.V(2).Infof("Executing for nodeAffinityType: %v", nodeAffinity)
klog.V(2).InfoS("Executing for nodeAffinityType", "nodeAffinity", nodeAffinity)
switch nodeAffinity {
case "requiredDuringSchedulingIgnoredDuringExecution":
for _, node := range nodes {
klog.V(1).Infof("Processing node: %#v\n", node.Name)
klog.V(1).InfoS("Processing node", "node", klog.KObj(node))
pods, err := podutil.ListEvictablePodsOnNode(ctx, client, node, evictLocalStoragePods)
pods, err := podutil.ListPodsOnANode(
ctx,
client,
node,
podutil.WithFilter(func(pod *v1.Pod) bool {
return evictable.IsEvictable(pod) &&
!nodeutil.PodFitsCurrentNode(pod, node) &&
nodeutil.PodFitsAnyNode(pod, nodes)
}),
podutil.WithNamespaces(includedNamespaces),
podutil.WithoutNamespaces(excludedNamespaces),
podutil.WithLabelSelector(strategy.Params.LabelSelector),
)
if err != nil {
klog.Errorf("failed to get pods from %v: %v", node.Name, err)
klog.ErrorS(err, "Failed to get pods", "node", klog.KObj(node))
}
for _, pod := range pods {
if pod.Spec.Affinity != nil && pod.Spec.Affinity.NodeAffinity != nil && pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution != nil {
if !nodeutil.PodFitsCurrentNode(pod, node) && nodeutil.PodFitsAnyNode(pod, nodes) {
klog.V(1).Infof("Evicting pod: %v", pod.Name)
if _, err := podEvictor.EvictPod(ctx, pod, node); err != nil {
klog.Errorf("Error evicting pod: (%#v)", err)
break
}
klog.V(1).InfoS("Evicting pod", "pod", klog.KObj(pod))
if _, err := podEvictor.EvictPod(ctx, pod, node, "NodeAffinity"); err != nil {
klog.ErrorS(err, "Error evicting pod")
break
}
}
}
}
default:
klog.Errorf("invalid nodeAffinityType: %v", nodeAffinity)
klog.ErrorS(nil, "Invalid nodeAffinityType", "nodeAffinity", nodeAffinity)
}
}
klog.V(1).Infof("Evicted %v pods", podEvictor.TotalEvicted())
}

View File

@@ -20,7 +20,8 @@ import (
"context"
"testing"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
policyv1 "k8s.io/api/policy/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
@@ -33,13 +34,23 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) {
ctx := context.Background()
requiredDuringSchedulingIgnoredDuringExecutionStrategy := api.DeschedulerStrategy{
Enabled: true,
Params: api.StrategyParameters{
Params: &api.StrategyParameters{
NodeAffinityType: []string{
"requiredDuringSchedulingIgnoredDuringExecution",
},
},
}
requiredDuringSchedulingIgnoredDuringExecutionWithNodeFitStrategy := api.DeschedulerStrategy{
Enabled: true,
Params: &api.StrategyParameters{
NodeAffinityType: []string{
"requiredDuringSchedulingIgnoredDuringExecution",
},
NodeFit: true,
},
}
nodeLabelKey := "kubernetes.io/desiredNode"
nodeLabelValue := "yes"
nodeWithLabels := test.BuildTestNode("nodeWithLabels", 2000, 3000, 10, nil)
@@ -93,13 +104,13 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) {
pods []v1.Pod
strategy api.DeschedulerStrategy
expectedEvictedPodCount int
maxPodsToEvict int
maxPodsToEvictPerNode int
}{
{
description: "Invalid strategy type, should not evict any pods",
strategy: api.DeschedulerStrategy{
Enabled: true,
Params: api.StrategyParameters{
Params: &api.StrategyParameters{
NodeAffinityType: []string{
"requiredDuringSchedulingRequiredDuringExecution",
},
@@ -108,7 +119,7 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) {
expectedEvictedPodCount: 0,
pods: addPodsToNode(nodeWithoutLabels),
nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels},
maxPodsToEvict: 0,
maxPodsToEvictPerNode: 0,
},
{
description: "Pod is correctly scheduled on node, no eviction expected",
@@ -116,7 +127,7 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) {
expectedEvictedPodCount: 0,
pods: addPodsToNode(nodeWithLabels),
nodes: []*v1.Node{nodeWithLabels},
maxPodsToEvict: 0,
maxPodsToEvictPerNode: 0,
},
{
description: "Pod is scheduled on node without matching labels, another schedulable node available, should be evicted",
@@ -124,23 +135,31 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) {
strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy,
pods: addPodsToNode(nodeWithoutLabels),
nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels},
maxPodsToEvict: 0,
maxPodsToEvictPerNode: 0,
},
{
description: "Pod is scheduled on node without matching labels, another schedulable node available, maxPodsToEvict set to 1, should not be evicted",
description: "Pod is scheduled on node without matching labels, another schedulable node available, maxPodsToEvictPerNode set to 1, should not be evicted",
expectedEvictedPodCount: 1,
strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy,
pods: addPodsToNode(nodeWithoutLabels),
nodes: []*v1.Node{nodeWithoutLabels, nodeWithLabels},
maxPodsToEvict: 1,
maxPodsToEvictPerNode: 1,
},
{
description: "Pod is scheduled on node without matching labels, but no node where pod fits is available, should not evict",
expectedEvictedPodCount: 0,
strategy: requiredDuringSchedulingIgnoredDuringExecutionStrategy,
strategy: requiredDuringSchedulingIgnoredDuringExecutionWithNodeFitStrategy,
pods: addPodsToNode(nodeWithoutLabels),
nodes: []*v1.Node{nodeWithoutLabels, unschedulableNodeWithLabels},
maxPodsToEvict: 0,
maxPodsToEvictPerNode: 0,
},
{
description: "Pod is scheduled on node without matching labels, and node where pod fits is available, should evict",
expectedEvictedPodCount: 0,
strategy: requiredDuringSchedulingIgnoredDuringExecutionWithNodeFitStrategy,
pods: addPodsToNode(nodeWithoutLabels),
nodes: []*v1.Node{nodeWithLabels, unschedulableNodeWithLabels},
maxPodsToEvictPerNode: 1,
},
}
@@ -153,13 +172,16 @@ func TestRemovePodsViolatingNodeAffinity(t *testing.T) {
podEvictor := evictions.NewPodEvictor(
fakeClient,
"v1",
policyv1.SchemeGroupVersion.String(),
false,
tc.maxPodsToEvict,
tc.maxPodsToEvictPerNode,
tc.nodes,
false,
false,
false,
)
RemovePodsViolatingNodeAffinity(ctx, fakeClient, tc.strategy, tc.nodes, false, podEvictor)
RemovePodsViolatingNodeAffinity(ctx, fakeClient, tc.strategy, tc.nodes, podEvictor)
actualEvictedPodCount := podEvictor.TotalEvicted()
if actualEvictedPodCount != tc.expectedEvictedPodCount {
t.Errorf("Test %#v failed, expected %v pod evictions, but got %v pod evictions\n", tc.description, tc.expectedEvictedPodCount, actualEvictedPodCount)

View File

@@ -18,6 +18,7 @@ package strategies
import (
"context"
"fmt"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
@@ -25,15 +26,68 @@ import (
"sigs.k8s.io/descheduler/pkg/utils"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog"
"k8s.io/klog/v2"
)
func validateRemovePodsViolatingNodeTaintsParams(params *api.StrategyParameters) error {
if params == nil {
return nil
}
// At most one of include/exclude can be set
if params.Namespaces != nil && len(params.Namespaces.Include) > 0 && len(params.Namespaces.Exclude) > 0 {
return fmt.Errorf("only one of Include/Exclude namespaces can be set")
}
if params.ThresholdPriority != nil && params.ThresholdPriorityClassName != "" {
return fmt.Errorf("only one of thresholdPriority and thresholdPriorityClassName can be set")
}
return nil
}
// RemovePodsViolatingNodeTaints evicts pods on the node which violate NoSchedule Taints on nodes
func RemovePodsViolatingNodeTaints(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, evictLocalStoragePods bool, podEvictor *evictions.PodEvictor) {
func RemovePodsViolatingNodeTaints(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor) {
if err := validateRemovePodsViolatingNodeTaintsParams(strategy.Params); err != nil {
klog.ErrorS(err, "Invalid RemovePodsViolatingNodeTaints parameters")
return
}
var includedNamespaces, excludedNamespaces []string
var labelSelector *metav1.LabelSelector
if strategy.Params != nil {
if strategy.Params.Namespaces != nil {
includedNamespaces = strategy.Params.Namespaces.Include
excludedNamespaces = strategy.Params.Namespaces.Exclude
}
labelSelector = strategy.Params.LabelSelector
}
thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params)
if err != nil {
klog.ErrorS(err, "Failed to get threshold priority from strategy's params")
return
}
nodeFit := false
if strategy.Params != nil {
nodeFit = strategy.Params.NodeFit
}
evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit))
for _, node := range nodes {
klog.V(1).Infof("Processing node: %#v\n", node.Name)
pods, err := podutil.ListEvictablePodsOnNode(ctx, client, node, evictLocalStoragePods)
klog.V(1).InfoS("Processing node", "node", klog.KObj(node))
pods, err := podutil.ListPodsOnANode(
ctx,
client,
node,
podutil.WithFilter(evictable.IsEvictable),
podutil.WithNamespaces(includedNamespaces),
podutil.WithoutNamespaces(excludedNamespaces),
podutil.WithLabelSelector(labelSelector),
)
if err != nil {
//no pods evicted as error encountered retrieving evictable Pods
return
@@ -45,9 +99,9 @@ func RemovePodsViolatingNodeTaints(ctx context.Context, client clientset.Interfa
node.Spec.Taints,
func(taint *v1.Taint) bool { return taint.Effect == v1.TaintEffectNoSchedule },
) {
klog.V(2).Infof("Not all taints with NoSchedule effect are tolerated after update for pod %v on node %v", pods[i].Name, node.Name)
if _, err := podEvictor.EvictPod(ctx, pods[i], node); err != nil {
klog.Errorf("Error evicting pod: (%#v)", err)
klog.V(2).InfoS("Not all taints with NoSchedule effect are tolerated after update for pod on node", "pod", klog.KObj(pods[i]), "node", klog.KObj(node))
if _, err := podEvictor.EvictPod(ctx, pods[i], node, "NodeTaint"); err != nil {
klog.ErrorS(err, "Error evicting pod")
break
}
}

View File

@@ -5,7 +5,8 @@ import (
"fmt"
"testing"
"k8s.io/api/core/v1"
v1 "k8s.io/api/core/v1"
policyv1 "k8s.io/api/policy/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes/fake"
@@ -50,6 +51,17 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) {
node2 := test.BuildTestNode("n2", 2000, 3000, 10, nil)
node1 = addTaintsToNode(node2, "testingTaint", "testing", []int{1})
node3 := test.BuildTestNode("n3", 2000, 3000, 10, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
"datacenter": "east",
}
})
node4 := test.BuildTestNode("n4", 2000, 3000, 10, func(node *v1.Node) {
node.Spec = v1.NodeSpec{
Unschedulable: true,
}
})
p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil)
p2 := test.BuildTestPod("p2", 100, 0, node1.Name, nil)
p3 := test.BuildTestPod("p3", 100, 0, node1.Name, nil)
@@ -69,12 +81,15 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) {
p10 := test.BuildTestPod("p10", 100, 0, node2.Name, nil)
p11 := test.BuildTestPod("p11", 100, 0, node2.Name, nil)
p11.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
p12 := test.BuildTestPod("p11", 100, 0, node2.Name, nil)
p12.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
// The following 4 pods won't get evicted.
// A Critical Pod.
p7.Namespace = "kube-system"
priority := utils.SystemCriticalPriority
p7.Spec.Priority = &priority
p7.ObjectMeta.OwnerReferences = test.GetNormalPodOwnerRefList()
// A daemonset.
p8.ObjectMeta.OwnerReferences = test.GetDaemonSetOwnerRefList()
@@ -97,13 +112,19 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) {
p3 = addTolerationToPod(p3, "testTaint", "test", 1)
p4 = addTolerationToPod(p4, "testTaintX", "testX", 1)
p12.Spec.NodeSelector = map[string]string{
"datacenter": "west",
}
tests := []struct {
description string
nodes []*v1.Node
pods []v1.Pod
evictLocalStoragePods bool
maxPodsToEvict int
evictSystemCriticalPods bool
maxPodsToEvictPerNode int
expectedEvictedPodCount int
nodeFit bool
}{
{
@@ -111,7 +132,8 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) {
pods: []v1.Pod{*p1, *p2, *p3},
nodes: []*v1.Node{node1},
evictLocalStoragePods: false,
maxPodsToEvict: 0,
evictSystemCriticalPods: false,
maxPodsToEvictPerNode: 0,
expectedEvictedPodCount: 1, //p2 gets evicted
},
{
@@ -119,15 +141,17 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) {
pods: []v1.Pod{*p1, *p3, *p4},
nodes: []*v1.Node{node1},
evictLocalStoragePods: false,
maxPodsToEvict: 0,
evictSystemCriticalPods: false,
maxPodsToEvictPerNode: 0,
expectedEvictedPodCount: 1, //p4 gets evicted
},
{
description: "Only <maxPodsToEvict> number of Pods not tolerating node taint should be evicted",
description: "Only <maxPodsToEvictPerNode> number of Pods not tolerating node taint should be evicted",
pods: []v1.Pod{*p1, *p5, *p6},
nodes: []*v1.Node{node1},
evictLocalStoragePods: false,
maxPodsToEvict: 1,
evictSystemCriticalPods: false,
maxPodsToEvictPerNode: 1,
expectedEvictedPodCount: 1, //p5 or p6 gets evicted
},
{
@@ -135,24 +159,66 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) {
pods: []v1.Pod{*p7, *p8, *p9, *p10},
nodes: []*v1.Node{node2},
evictLocalStoragePods: false,
maxPodsToEvict: 0,
expectedEvictedPodCount: 0,
evictSystemCriticalPods: false,
maxPodsToEvictPerNode: 0,
expectedEvictedPodCount: 0, //nothing is evicted
},
{
description: "Critical pods except storage pods not tolerating node taint should not be evicted",
pods: []v1.Pod{*p7, *p8, *p9, *p10},
nodes: []*v1.Node{node2},
evictLocalStoragePods: true,
maxPodsToEvict: 0,
expectedEvictedPodCount: 1,
evictSystemCriticalPods: false,
maxPodsToEvictPerNode: 0,
expectedEvictedPodCount: 1, //p9 gets evicted
},
{
description: "Critical and non critical pods, only non critical pods not tolerating node taint should be evicted",
pods: []v1.Pod{*p7, *p8, *p10, *p11},
nodes: []*v1.Node{node2},
evictLocalStoragePods: false,
maxPodsToEvict: 0,
expectedEvictedPodCount: 1,
evictSystemCriticalPods: false,
maxPodsToEvictPerNode: 0,
expectedEvictedPodCount: 1, //p11 gets evicted
},
{
description: "Critical and non critical pods, pods not tolerating node taint should be evicted even if they are critical",
pods: []v1.Pod{*p2, *p7, *p9, *p10},
nodes: []*v1.Node{node2},
evictLocalStoragePods: false,
evictSystemCriticalPods: true,
maxPodsToEvictPerNode: 0,
expectedEvictedPodCount: 2, //p2 and p7 are evicted
},
{
description: "Pod p2 doesn't tolerate taint on it's node, but also doesn't tolerate taints on other nodes",
pods: []v1.Pod{*p1, *p2, *p3},
nodes: []*v1.Node{node1, node2},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
maxPodsToEvictPerNode: 0,
expectedEvictedPodCount: 0, //p2 gets evicted
nodeFit: true,
},
{
description: "Pod p12 doesn't tolerate taint on it's node, but other nodes don't match it's selector",
pods: []v1.Pod{*p1, *p3, *p12},
nodes: []*v1.Node{node1, node3},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
maxPodsToEvictPerNode: 0,
expectedEvictedPodCount: 0, //p2 gets evicted
nodeFit: true,
},
{
description: "Pod p2 doesn't tolerate taint on it's node, but other nodes are unschedulable",
pods: []v1.Pod{*p1, *p2, *p3},
nodes: []*v1.Node{node1, node4},
evictLocalStoragePods: false,
evictSystemCriticalPods: false,
maxPodsToEvictPerNode: 0,
expectedEvictedPodCount: 0, //p2 gets evicted
nodeFit: true,
},
}
@@ -166,13 +232,22 @@ func TestDeletePodsViolatingNodeTaints(t *testing.T) {
podEvictor := evictions.NewPodEvictor(
fakeClient,
"v1",
policyv1.SchemeGroupVersion.String(),
false,
tc.maxPodsToEvict,
tc.maxPodsToEvictPerNode,
tc.nodes,
tc.evictLocalStoragePods,
tc.evictSystemCriticalPods,
false,
)
RemovePodsViolatingNodeTaints(ctx, fakeClient, api.DeschedulerStrategy{}, tc.nodes, tc.evictLocalStoragePods, podEvictor)
strategy := api.DeschedulerStrategy{
Params: &api.StrategyParameters{
NodeFit: tc.nodeFit,
},
}
RemovePodsViolatingNodeTaints(ctx, fakeClient, strategy, tc.nodes, podEvictor)
actualEvictedPodCount := podEvictor.TotalEvicted()
if actualEvictedPodCount != tc.expectedEvictedPodCount {
t.Errorf("Test %#v failed, Unexpected no of pods evicted: pods evicted: %d, expected: %d", tc.description, actualEvictedPodCount, tc.expectedEvictedPodCount)

View File

@@ -0,0 +1,164 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nodeutilization
import (
"context"
"fmt"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
"sigs.k8s.io/descheduler/pkg/utils"
)
// HighNodeUtilization evicts pods from under utilized nodes so that scheduler can schedule according to its strategy.
// Note that CPU/Memory requests are used to calculate nodes' utilization and not the actual resource usage.
func HighNodeUtilization(ctx context.Context, client clientset.Interface, strategy api.DeschedulerStrategy, nodes []*v1.Node, podEvictor *evictions.PodEvictor) {
if err := validateNodeUtilizationParams(strategy.Params); err != nil {
klog.ErrorS(err, "Invalid HighNodeUtilization parameters")
return
}
nodeFit := false
if strategy.Params != nil {
nodeFit = strategy.Params.NodeFit
}
thresholdPriority, err := utils.GetPriorityFromStrategyParams(ctx, client, strategy.Params)
if err != nil {
klog.ErrorS(err, "Failed to get threshold priority from strategy's params")
return
}
thresholds := strategy.Params.NodeResourceUtilizationThresholds.Thresholds
targetThresholds := strategy.Params.NodeResourceUtilizationThresholds.TargetThresholds
if err := validateHighUtilizationStrategyConfig(thresholds, targetThresholds); err != nil {
klog.ErrorS(err, "HighNodeUtilization config is not valid")
return
}
targetThresholds = make(api.ResourceThresholds)
setDefaultForThresholds(thresholds, targetThresholds)
resourceNames := getResourceNames(targetThresholds)
sourceNodes, highNodes := classifyNodes(
getNodeUsage(ctx, client, nodes, thresholds, targetThresholds, resourceNames),
func(node *v1.Node, usage NodeUsage) bool {
return isNodeWithLowUtilization(usage)
},
func(node *v1.Node, usage NodeUsage) bool {
if nodeutil.IsNodeUnschedulable(node) {
klog.V(2).InfoS("Node is unschedulable", "node", klog.KObj(node))
return false
}
return !isNodeWithLowUtilization(usage)
})
// log message in one line
keysAndValues := []interface{}{
"CPU", thresholds[v1.ResourceCPU],
"Mem", thresholds[v1.ResourceMemory],
"Pods", thresholds[v1.ResourcePods],
}
for name := range thresholds {
if !isBasicResource(name) {
keysAndValues = append(keysAndValues, string(name), int64(thresholds[name]))
}
}
klog.V(1).InfoS("Criteria for a node below target utilization", keysAndValues...)
klog.V(1).InfoS("Number of underutilized nodes", "totalNumber", len(sourceNodes))
if len(sourceNodes) == 0 {
klog.V(1).InfoS("No node is underutilized, nothing to do here, you might tune your thresholds further")
return
}
if len(sourceNodes) <= strategy.Params.NodeResourceUtilizationThresholds.NumberOfNodes {
klog.V(1).InfoS("Number of nodes underutilized is less or equal than NumberOfNodes, nothing to do here", "underutilizedNodes", len(sourceNodes), "numberOfNodes", strategy.Params.NodeResourceUtilizationThresholds.NumberOfNodes)
return
}
if len(sourceNodes) == len(nodes) {
klog.V(1).InfoS("All nodes are underutilized, nothing to do here")
return
}
if len(highNodes) == 0 {
klog.V(1).InfoS("No node is available to schedule the pods, nothing to do here")
return
}
evictable := podEvictor.Evictable(evictions.WithPriorityThreshold(thresholdPriority), evictions.WithNodeFit(nodeFit))
// stop if the total available usage has dropped to zero - no more pods can be scheduled
continueEvictionCond := func(nodeUsage NodeUsage, totalAvailableUsage map[v1.ResourceName]*resource.Quantity) bool {
for name := range totalAvailableUsage {
if totalAvailableUsage[name].CmpInt64(0) < 1 {
return false
}
}
return true
}
evictPodsFromSourceNodes(
ctx,
sourceNodes,
highNodes,
podEvictor,
evictable.IsEvictable,
resourceNames,
"HighNodeUtilization",
continueEvictionCond)
}
func validateHighUtilizationStrategyConfig(thresholds, targetThresholds api.ResourceThresholds) error {
if targetThresholds != nil {
return fmt.Errorf("targetThresholds is not applicable for HighNodeUtilization")
}
if err := validateThresholds(thresholds); err != nil {
return fmt.Errorf("thresholds config is not valid: %v", err)
}
return nil
}
func setDefaultForThresholds(thresholds, targetThresholds api.ResourceThresholds) {
// check if Pods/CPU/Mem are set, if not, set them to 100
if _, ok := thresholds[v1.ResourcePods]; !ok {
thresholds[v1.ResourcePods] = MaxResourcePercentage
}
if _, ok := thresholds[v1.ResourceCPU]; !ok {
thresholds[v1.ResourceCPU] = MaxResourcePercentage
}
if _, ok := thresholds[v1.ResourceMemory]; !ok {
thresholds[v1.ResourceMemory] = MaxResourcePercentage
}
// Default targetThreshold resource values to 100
targetThresholds[v1.ResourcePods] = MaxResourcePercentage
targetThresholds[v1.ResourceCPU] = MaxResourcePercentage
targetThresholds[v1.ResourceMemory] = MaxResourcePercentage
for name := range thresholds {
if !isBasicResource(name) {
targetThresholds[name] = MaxResourcePercentage
}
}
}

View File

@@ -0,0 +1,784 @@
/*
Copyright 2021 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nodeutilization
import (
"context"
"fmt"
"strings"
"testing"
v1 "k8s.io/api/core/v1"
"k8s.io/api/policy/v1beta1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
"sigs.k8s.io/descheduler/pkg/utils"
"sigs.k8s.io/descheduler/test"
)
func TestHighNodeUtilization(t *testing.T) {
ctx := context.Background()
n1NodeName := "n1"
n2NodeName := "n2"
n3NodeName := "n3"
nodeSelectorKey := "datacenter"
nodeSelectorValue := "west"
testCases := []struct {
name string
thresholds api.ResourceThresholds
nodes map[string]*v1.Node
pods map[string]*v1.PodList
maxPodsToEvictPerNode int
expectedPodsEvicted int
evictedPods []string
}{
{
name: "no node below threshold usage",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourcePods: 20,
},
nodes: map[string]*v1.Node{
n1NodeName: test.BuildTestNode(n1NodeName, 4000, 3000, 10, nil),
n2NodeName: test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
n3NodeName: test.BuildTestNode(n3NodeName, 4000, 3000, 10, nil),
},
pods: map[string]*v1.PodList{
n1NodeName: {
Items: []v1.Pod{
// These won't be evicted.
*test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p2", 400, 0, n1NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p3", 400, 0, n1NodeName, test.SetRSOwnerRef),
},
},
n2NodeName: {
Items: []v1.Pod{
// These won't be evicted.
*test.BuildTestPod("p4", 400, 0, n2NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p5", 400, 0, n2NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p6", 400, 0, n2NodeName, test.SetRSOwnerRef),
},
},
n3NodeName: {
Items: []v1.Pod{
// These won't be evicted.
*test.BuildTestPod("p7", 400, 0, n3NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p8", 400, 0, n3NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p9", 400, 0, n3NodeName, test.SetRSOwnerRef),
},
},
},
maxPodsToEvictPerNode: 0,
expectedPodsEvicted: 0,
},
{
name: "no evictable pods",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 40,
v1.ResourcePods: 40,
},
nodes: map[string]*v1.Node{
n1NodeName: test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil),
n2NodeName: test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
n3NodeName: test.BuildTestNode(n3NodeName, 4000, 3000, 10, nil),
},
pods: map[string]*v1.PodList{
n1NodeName: {
Items: []v1.Pod{
// These won't be evicted.
*test.BuildTestPod("p1", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A pod with local storage.
test.SetNormalOwnerRef(pod)
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI)},
},
},
}
// A Mirror Pod.
pod.Annotations = test.GetMirrorPodAnnotation()
}),
*test.BuildTestPod("p2", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A Critical Pod.
pod.Namespace = "kube-system"
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
}),
},
},
n2NodeName: {
Items: []v1.Pod{
// These won't be evicted.
*test.BuildTestPod("p3", 400, 0, n2NodeName, test.SetDSOwnerRef),
*test.BuildTestPod("p4", 400, 0, n2NodeName, test.SetDSOwnerRef),
},
},
n3NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p5", 400, 0, n3NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p6", 400, 0, n3NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p7", 400, 0, n3NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p8", 400, 0, n3NodeName, test.SetRSOwnerRef),
},
},
},
maxPodsToEvictPerNode: 0,
expectedPodsEvicted: 0,
},
{
name: "no node to schedule evicted pods",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourcePods: 20,
},
nodes: map[string]*v1.Node{
n1NodeName: test.BuildTestNode(n1NodeName, 4000, 3000, 10, nil),
n2NodeName: test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
n3NodeName: test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable),
},
pods: map[string]*v1.PodList{
n1NodeName: {
Items: []v1.Pod{
// These can't be evicted.
*test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef),
},
},
n2NodeName: {
Items: []v1.Pod{
// These can't be evicted.
*test.BuildTestPod("p2", 400, 0, n2NodeName, test.SetRSOwnerRef),
},
},
n3NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p3", 400, 0, n3NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p4", 400, 0, n3NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p5", 400, 0, n3NodeName, test.SetRSOwnerRef),
},
},
},
maxPodsToEvictPerNode: 0,
expectedPodsEvicted: 0,
},
{
name: "without priorities",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
v1.ResourcePods: 30,
},
nodes: map[string]*v1.Node{
n1NodeName: test.BuildTestNode(n1NodeName, 4000, 3000, 10, nil),
n2NodeName: test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
n3NodeName: test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable),
},
pods: map[string]*v1.PodList{
n1NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef),
// These won't be evicted.
*test.BuildTestPod("p2", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A Critical Pod.
pod.Namespace = "kube-system"
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
}),
},
},
n2NodeName: {
Items: []v1.Pod{
// These won't be evicted.
*test.BuildTestPod("p3", 400, 0, n2NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p4", 400, 0, n2NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p5", 400, 0, n2NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p6", 400, 0, n2NodeName, test.SetRSOwnerRef),
},
},
n3NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p7", 400, 0, n3NodeName, test.SetRSOwnerRef),
},
},
},
maxPodsToEvictPerNode: 0,
expectedPodsEvicted: 2,
evictedPods: []string{"p1", "p7"},
},
{
name: "without priorities stop when resource capacity is depleted",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
v1.ResourcePods: 30,
},
nodes: map[string]*v1.Node{
n1NodeName: test.BuildTestNode(n1NodeName, 2000, 3000, 10, nil),
n2NodeName: test.BuildTestNode(n2NodeName, 2000, 3000, 10, nil),
n3NodeName: test.BuildTestNode(n3NodeName, 2000, 3000, 10, test.SetNodeUnschedulable),
},
pods: map[string]*v1.PodList{
n1NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef),
},
},
n2NodeName: {
Items: []v1.Pod{
// These won't be evicted.
*test.BuildTestPod("p2", 400, 0, n2NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p3", 400, 0, n2NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p4", 400, 0, n2NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p5", 400, 0, n2NodeName, test.SetRSOwnerRef),
},
},
n3NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p6", 400, 0, n3NodeName, test.SetRSOwnerRef),
},
},
},
maxPodsToEvictPerNode: 0,
expectedPodsEvicted: 1,
},
{
name: "with priorities",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
},
nodes: map[string]*v1.Node{
n1NodeName: test.BuildTestNode(n1NodeName, 4000, 3000, 10, nil),
n2NodeName: test.BuildTestNode(n2NodeName, 2000, 3000, 10, nil),
n3NodeName: test.BuildTestNode(n3NodeName, 2000, 3000, 10, test.SetNodeUnschedulable),
},
pods: map[string]*v1.PodList{
n1NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p1", 400, 0, n1NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.SetPodPriority(pod, lowPriority)
}),
*test.BuildTestPod("p2", 400, 0, n1NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.SetPodPriority(pod, highPriority)
}),
},
},
n2NodeName: {
Items: []v1.Pod{
// These won't be evicted.
*test.BuildTestPod("p5", 400, 0, n2NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p6", 400, 0, n2NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p7", 400, 0, n2NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p8", 400, 0, n2NodeName, test.SetRSOwnerRef),
},
},
n3NodeName: {
Items: []v1.Pod{
// These won't be evicted.
*test.BuildTestPod("p9", 400, 0, n3NodeName, test.SetDSOwnerRef),
},
},
},
maxPodsToEvictPerNode: 0,
expectedPodsEvicted: 1,
evictedPods: []string{"p1"},
},
{
name: "without priorities evicting best-effort pods only",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
},
nodes: map[string]*v1.Node{
n1NodeName: test.BuildTestNode(n1NodeName, 3000, 3000, 10, nil),
n2NodeName: test.BuildTestNode(n2NodeName, 3000, 3000, 5, nil),
n3NodeName: test.BuildTestNode(n3NodeName, 3000, 3000, 10, test.SetNodeUnschedulable),
},
// All pods are assumed to be burstable (test.BuildTestNode always sets both cpu/memory resource requests to some value)
pods: map[string]*v1.PodList{
n1NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p1", 400, 0, n1NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.MakeBestEffortPod(pod)
}),
*test.BuildTestPod("p2", 400, 0, n1NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
}),
},
},
n2NodeName: {
Items: []v1.Pod{
// These won't be evicted.
*test.BuildTestPod("p3", 400, 0, n2NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p4", 400, 0, n2NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p5", 400, 0, n2NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p6", 400, 0, n2NodeName, test.SetRSOwnerRef),
},
},
n3NodeName: {
Items: []v1.Pod{},
},
},
maxPodsToEvictPerNode: 0,
expectedPodsEvicted: 1,
evictedPods: []string{"p1"},
},
{
name: "with extended resource",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 20,
extendedResource: 40,
},
nodes: map[string]*v1.Node{
n1NodeName: test.BuildTestNode(n1NodeName, 4000, 3000, 10, func(node *v1.Node) {
test.SetNodeExtendedResource(node, extendedResource, 8)
}),
n2NodeName: test.BuildTestNode(n2NodeName, 4000, 3000, 10, func(node *v1.Node) {
test.SetNodeExtendedResource(node, extendedResource, 8)
}),
n3NodeName: test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable),
},
pods: map[string]*v1.PodList{
n1NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p1", 100, 0, n1NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.SetPodExtendedResourceRequest(pod, extendedResource, 1)
}),
*test.BuildTestPod("p2", 100, 0, n1NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.SetPodExtendedResourceRequest(pod, extendedResource, 1)
}),
// These won't be evicted
*test.BuildTestPod("p2", 100, 0, n1NodeName, func(pod *v1.Pod) {
test.SetDSOwnerRef(pod)
test.SetPodExtendedResourceRequest(pod, extendedResource, 1)
}),
},
},
n2NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p3", 500, 0, n2NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.SetPodExtendedResourceRequest(pod, extendedResource, 1)
}),
*test.BuildTestPod("p4", 500, 0, n2NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.SetPodExtendedResourceRequest(pod, extendedResource, 1)
}),
*test.BuildTestPod("p5", 500, 0, n2NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.SetPodExtendedResourceRequest(pod, extendedResource, 1)
}),
*test.BuildTestPod("p6", 500, 0, n2NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.SetPodExtendedResourceRequest(pod, extendedResource, 1)
}),
},
},
n3NodeName: {
Items: []v1.Pod{},
},
},
maxPodsToEvictPerNode: 0,
expectedPodsEvicted: 2,
evictedPods: []string{"p1", "p2"},
},
{
name: "with extended resource in some of nodes",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 40,
extendedResource: 40,
},
nodes: map[string]*v1.Node{
n1NodeName: test.BuildTestNode(n1NodeName, 4000, 3000, 10, func(node *v1.Node) {
test.SetNodeExtendedResource(node, extendedResource, 8)
}),
n2NodeName: test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
n3NodeName: test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable),
},
pods: map[string]*v1.PodList{
n1NodeName: {
Items: []v1.Pod{
//These won't be evicted
*test.BuildTestPod("p1", 100, 0, n1NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.SetPodExtendedResourceRequest(pod, extendedResource, 1)
}),
*test.BuildTestPod("p2", 100, 0, n1NodeName, func(pod *v1.Pod) {
test.SetRSOwnerRef(pod)
test.SetPodExtendedResourceRequest(pod, extendedResource, 1)
}),
},
},
n2NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p3", 500, 0, n2NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p4", 500, 0, n2NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p5", 500, 0, n2NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p6", 500, 0, n2NodeName, test.SetRSOwnerRef),
},
},
n3NodeName: {
Items: []v1.Pod{},
},
},
maxPodsToEvictPerNode: 0,
expectedPodsEvicted: 0,
},
{
name: "Other node match pod node selector",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
v1.ResourcePods: 30,
},
nodes: map[string]*v1.Node{
n1NodeName: test.BuildTestNode(n1NodeName, 4000, 3000, 9, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeSelectorKey: nodeSelectorValue,
}
}),
n2NodeName: test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
},
pods: map[string]*v1.PodList{
n1NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p2", 400, 0, n1NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p3", 400, 0, n1NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p4", 400, 0, n1NodeName, test.SetDSOwnerRef),
},
},
n2NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p5", 400, 0, n2NodeName, func(pod *v1.Pod) {
// A pod selecting nodes in the "west" datacenter
test.SetRSOwnerRef(pod)
pod.Spec.NodeSelector = map[string]string{
nodeSelectorKey: nodeSelectorValue,
}
}),
},
},
},
maxPodsToEvictPerNode: 0,
expectedPodsEvicted: 1,
},
{
name: "Other node does not match pod node selector",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
v1.ResourcePods: 30,
},
nodes: map[string]*v1.Node{
n1NodeName: test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil),
n2NodeName: test.BuildTestNode(n2NodeName, 4000, 3000, 10, nil),
},
pods: map[string]*v1.PodList{
n1NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p2", 400, 0, n1NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p3", 400, 0, n1NodeName, test.SetRSOwnerRef),
*test.BuildTestPod("p4", 400, 0, n1NodeName, test.SetDSOwnerRef),
},
},
n2NodeName: {
Items: []v1.Pod{
*test.BuildTestPod("p5", 400, 0, n2NodeName, func(pod *v1.Pod) {
// A pod selecting nodes in the "west" datacenter
test.SetRSOwnerRef(pod)
pod.Spec.NodeSelector = map[string]string{
nodeSelectorKey: nodeSelectorValue,
}
}),
},
},
},
maxPodsToEvictPerNode: 0,
expectedPodsEvicted: 0,
},
}
for _, test := range testCases {
t.Run(test.name, func(t *testing.T) {
fakeClient := &fake.Clientset{}
fakeClient.Fake.AddReactor("list", "pods", func(action core.Action) (bool, runtime.Object, error) {
list := action.(core.ListAction)
fieldString := list.GetListRestrictions().Fields.String()
if strings.Contains(fieldString, n1NodeName) {
return true, test.pods[n1NodeName], nil
}
if strings.Contains(fieldString, n2NodeName) {
return true, test.pods[n2NodeName], nil
}
if strings.Contains(fieldString, n3NodeName) {
return true, test.pods[n3NodeName], nil
}
return true, nil, fmt.Errorf("Failed to list: %v", list)
})
fakeClient.Fake.AddReactor("get", "nodes", func(action core.Action) (bool, runtime.Object, error) {
getAction := action.(core.GetAction)
if node, exists := test.nodes[getAction.GetName()]; exists {
return true, node, nil
}
return true, nil, fmt.Errorf("Wrong node: %v", getAction.GetName())
})
podsForEviction := make(map[string]struct{})
for _, pod := range test.evictedPods {
podsForEviction[pod] = struct{}{}
}
evictionFailed := false
if len(test.evictedPods) > 0 {
fakeClient.Fake.AddReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) {
getAction := action.(core.CreateAction)
obj := getAction.GetObject()
if eviction, ok := obj.(*v1beta1.Eviction); ok {
if _, exists := podsForEviction[eviction.Name]; exists {
return true, obj, nil
}
evictionFailed = true
return true, nil, fmt.Errorf("pod %q was unexpectedly evicted", eviction.Name)
}
return true, obj, nil
})
}
var nodes []*v1.Node
for _, node := range test.nodes {
nodes = append(nodes, node)
}
podEvictor := evictions.NewPodEvictor(
fakeClient,
"v1",
false,
test.maxPodsToEvictPerNode,
nodes,
false,
false,
false,
)
strategy := api.DeschedulerStrategy{
Enabled: true,
Params: &api.StrategyParameters{
NodeResourceUtilizationThresholds: &api.NodeResourceUtilizationThresholds{
Thresholds: test.thresholds,
},
NodeFit: true,
},
}
HighNodeUtilization(ctx, fakeClient, strategy, nodes, podEvictor)
podsEvicted := podEvictor.TotalEvicted()
if test.expectedPodsEvicted != podsEvicted {
t.Errorf("Expected %#v pods to be evicted but %#v got evicted", test.expectedPodsEvicted, podsEvicted)
}
if evictionFailed {
t.Errorf("Pod evictions failed unexpectedly")
}
})
}
}
func TestValidateHighNodeUtilizationStrategyConfig(t *testing.T) {
tests := []struct {
name string
thresholds api.ResourceThresholds
targetThresholds api.ResourceThresholds
errInfo error
}{
{
name: "passing target thresholds",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 20,
v1.ResourceMemory: 20,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
v1.ResourceMemory: 80,
},
errInfo: fmt.Errorf("targetThresholds is not applicable for HighNodeUtilization"),
},
{
name: "passing empty thresholds",
thresholds: api.ResourceThresholds{},
errInfo: fmt.Errorf("thresholds config is not valid: no resource threshold is configured"),
},
{
name: "passing invalid thresholds",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
v1.ResourceMemory: 120,
},
errInfo: fmt.Errorf("thresholds config is not valid: %v", fmt.Errorf(
"%v threshold not in [%v, %v] range", v1.ResourceMemory, MinResourcePercentage, MaxResourcePercentage)),
},
{
name: "passing valid strategy config",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
v1.ResourceMemory: 80,
},
errInfo: nil,
},
{
name: "passing valid strategy config with extended resource",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 80,
v1.ResourceMemory: 80,
extendedResource: 80,
},
errInfo: nil,
},
}
for _, testCase := range tests {
validateErr := validateHighUtilizationStrategyConfig(testCase.thresholds, testCase.targetThresholds)
if validateErr == nil || testCase.errInfo == nil {
if validateErr != testCase.errInfo {
t.Errorf("expected validity of strategy config: thresholds %#v targetThresholds %#v to be %v but got %v instead",
testCase.thresholds, testCase.targetThresholds, testCase.errInfo, validateErr)
}
} else if validateErr.Error() != testCase.errInfo.Error() {
t.Errorf("expected validity of strategy config: thresholds %#v targetThresholds %#v to be %v but got %v instead",
testCase.thresholds, testCase.targetThresholds, testCase.errInfo, validateErr)
}
}
}
func TestHighNodeUtilizationWithTaints(t *testing.T) {
ctx := context.Background()
strategy := api.DeschedulerStrategy{
Enabled: true,
Params: &api.StrategyParameters{
NodeResourceUtilizationThresholds: &api.NodeResourceUtilizationThresholds{
Thresholds: api.ResourceThresholds{
v1.ResourceCPU: 40,
},
},
},
}
n1 := test.BuildTestNode("n1", 1000, 3000, 10, nil)
n2 := test.BuildTestNode("n2", 1000, 3000, 10, nil)
n3 := test.BuildTestNode("n3", 1000, 3000, 10, nil)
n3withTaints := n3.DeepCopy()
n3withTaints.Spec.Taints = []v1.Taint{
{
Key: "key",
Value: "value",
Effect: v1.TaintEffectNoSchedule,
},
}
podThatToleratesTaint := test.BuildTestPod("tolerate_pod", 200, 0, n1.Name, test.SetRSOwnerRef)
podThatToleratesTaint.Spec.Tolerations = []v1.Toleration{
{
Key: "key",
Value: "value",
},
}
tests := []struct {
name string
nodes []*v1.Node
pods []*v1.Pod
evictionsExpected int
}{
{
name: "No taints",
nodes: []*v1.Node{n1, n2, n3},
pods: []*v1.Pod{
//Node 1 pods
test.BuildTestPod(fmt.Sprintf("pod_1_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_2_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
test.BuildTestPod(fmt.Sprintf("pod_3_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
// Node 2 pods
test.BuildTestPod(fmt.Sprintf("pod_4_%s", n2.Name), 200, 0, n2.Name, test.SetRSOwnerRef),
},
evictionsExpected: 1,
},
{
name: "No pod tolerates node taint",
nodes: []*v1.Node{n1, n3withTaints},
pods: []*v1.Pod{
//Node 1 pods
test.BuildTestPod(fmt.Sprintf("pod_1_%s", n1.Name), 200, 0, n1.Name, test.SetRSOwnerRef),
// Node 3 pods
test.BuildTestPod(fmt.Sprintf("pod_2_%s", n3withTaints.Name), 200, 0, n3withTaints.Name, test.SetRSOwnerRef),
},
evictionsExpected: 0,
},
{
name: "Pod which tolerates node taint",
nodes: []*v1.Node{n1, n3withTaints},
pods: []*v1.Pod{
//Node 1 pods
test.BuildTestPod(fmt.Sprintf("pod_1_%s", n1.Name), 100, 0, n1.Name, test.SetRSOwnerRef),
podThatToleratesTaint,
// Node 3 pods
test.BuildTestPod(fmt.Sprintf("pod_9_%s", n3withTaints.Name), 500, 0, n3withTaints.Name, test.SetRSOwnerRef),
},
evictionsExpected: 1,
},
}
for _, item := range tests {
t.Run(item.name, func(t *testing.T) {
var objs []runtime.Object
for _, node := range item.nodes {
objs = append(objs, node)
}
for _, pod := range item.pods {
objs = append(objs, pod)
}
fakeClient := fake.NewSimpleClientset(objs...)
podEvictor := evictions.NewPodEvictor(
fakeClient,
"policy/v1",
false,
item.evictionsExpected,
item.nodes,
false,
false,
false,
)
HighNodeUtilization(ctx, fakeClient, strategy, item.nodes, podEvictor)
if item.evictionsExpected != podEvictor.TotalEvicted() {
t.Errorf("Expected %v evictions, got %v", item.evictionsExpected, podEvictor.TotalEvicted())
}
})
}
}

Some files were not shown because too many files have changed in this diff Show More