1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 05:14:13 +01:00

Compare commits

...

12 Commits

Author SHA1 Message Date
Kubernetes Prow Robot
eca09d470a Merge pull request #1427 from a7i/helm-v0.30.1-release
helm: upgrade to v0.30.1
2024-06-05 05:06:04 -07:00
Amir Alavi
ff2b9dc19f helm: upgrade to v0.30.1 2024-06-05 07:46:59 -04:00
Kubernetes Prow Robot
9f7e7fd5bb Merge pull request #1425 from a7i/automated-cherry-pick-of-#1378-#1390-#1412-#1413-#1416-#1395-upstream-release-1.30
Automated cherry pick of #1378: Fix the replicas type for the helm-chart
#1390: allow 'falsey' value in cmdOption
#1412: fix helm's default deschedulerPolicy
#1413: fix TOC location in Readme
#1416: use cmd context instead of using context.Background()
#1395: fix the issue that the pod anti-filtering rules are not
2024-06-05 02:43:20 -07:00
Hao Fan
7e85b79556 fix the issue that the pod anti-filtering rules are not taking effect 2024-06-05 00:03:33 -04:00
googs1025
8247f92fe0 use cmd context instead of using context.Background() 2024-06-05 00:03:33 -04:00
balazs.benyo
b1391edd2a fix TOC location in Readme 2024-06-05 00:03:32 -04:00
balazs.benyo
a861867022 fix helm's default deschedulerPolicy 2024-06-05 00:03:32 -04:00
Omer Aplatony
a09c4d2c61 Fixed options without value 2024-06-05 00:03:32 -04:00
Omer Aplatony
b33845c383 Added speaces 2024-06-05 00:03:31 -04:00
Omer Aplatony
6a930de272 allow 'falsey' value in cmdOption 2024-06-05 00:03:31 -04:00
Youqing Han
53a27209cf Convert the replicas value to int for comparision 2024-06-05 00:03:31 -04:00
Youqing Han
7182bcdc10 Fix the replicas type for the helm-chart 2024-06-05 00:03:31 -04:00
16 changed files with 149 additions and 85 deletions

View File

@@ -2,7 +2,7 @@
![Release Charts](https://github.com/kubernetes-sigs/descheduler/workflows/Release%20Charts/badge.svg)
<p align="left">
Click at the [bullet list icon] at the top left corner of the Readme visualization for the github generated table of contents.
↗️ Click at the [bullet list icon] at the top right corner of the Readme visualization for the github generated table of contents.
</p>
<p align="center">

View File

@@ -1,7 +1,7 @@
apiVersion: v1
name: descheduler
version: 0.30.0
appVersion: 0.30.0
version: 0.30.1
appVersion: 0.30.1
description: Descheduler for Kubernetes is used to rebalance clusters by evicting pods that can potentially be scheduled on better nodes. In the current implementation, descheduler does not schedule replacement of evicted pods but relies on the default scheduler for that.
keywords:
- kubernetes

View File

@@ -1,7 +1,7 @@
Descheduler installed as a {{ .Values.kind }}.
{{- if eq .Values.kind "Deployment" }}
{{- if eq .Values.replicas 1.0}}
{{- if eq (.Values.replicas | int) 1 }}
WARNING: You set replica count as 1 and workload kind as Deployment however leaderElection is not enabled. Consider enabling Leader Election for HA mode.
{{- end}}
{{- if .Values.leaderElection }}

View File

@@ -81,7 +81,11 @@ spec:
args:
- --policy-config-file=/policy-dir/policy.yaml
{{- range $key, $value := .Values.cmdOptions }}
- {{ printf "--%s" $key }}{{ if $value }}={{ $value }}{{ end }}
{{- if ne $value nil }}
- {{ printf "--%s=%s" $key (toString $value) }}
{{- else }}
- {{ printf "--%s" $key }}
{{- end }}
{{- end }}
livenessProbe:
{{- toYaml .Values.livenessProbe | nindent 16 }}

View File

@@ -7,7 +7,7 @@ metadata:
labels:
{{- include "descheduler.labels" . | nindent 4 }}
spec:
{{- if gt .Values.replicas 1.0}}
{{- if gt (.Values.replicas | int) 1 }}
{{- if not .Values.leaderElection.enabled }}
{{- fail "You must set leaderElection to use more than 1 replica"}}
{{- end}}
@@ -53,7 +53,11 @@ spec:
- --policy-config-file=/policy-dir/policy.yaml
- --descheduling-interval={{ required "deschedulingInterval required for running as Deployment" .Values.deschedulingInterval }}
{{- range $key, $value := .Values.cmdOptions }}
- {{ printf "--%s" $key }}{{ if $value }}={{ $value }}{{ end }}
{{- if ne $value nil }}
- {{ printf "--%s=%s" $key (toString $value) }}
{{- else }}
- {{ printf "--%s" $key }}
{{- end }}
{{- end }}
{{- include "descheduler.leaderElection" . | nindent 12 }}
ports:

View File

@@ -111,14 +111,13 @@ deschedulerPolicy:
args:
podRestartThreshold: 100
includingInitContainers: true
- name: RemovePodsViolatingNodeTaints
- name: RemovePodsViolatingNodeAffinity
args:
nodeAffinityType:
- requiredDuringSchedulingIgnoredDuringExecution
- requiredDuringSchedulingIgnoredDuringExecution
- name: RemovePodsViolatingNodeTaints
- name: RemovePodsViolatingInterPodAntiAffinity
- name: RemovePodsViolatingTopologySpreadConstraint
args:
includeSoftConstraints: false
- name: LowNodeUtilization
args:
thresholds:
@@ -133,7 +132,6 @@ deschedulerPolicy:
balance:
enabled:
- RemoveDuplicates
- RemovePodsViolatingNodeAffinity
- RemovePodsViolatingTopologySpreadConstraint
- LowNodeUtilization
deschedule:

View File

@@ -77,7 +77,7 @@ func NewDeschedulerCommand(out io.Writer) *cobra.Command {
secureServing.DisableHTTP2 = !s.EnableHTTP2
ctx, done := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
ctx, done := signal.NotifyContext(cmd.Context(), syscall.SIGINT, syscall.SIGTERM)
pathRecorderMux := mux.NewPathRecorderMux("descheduler")
if !s.DisableMetrics {

View File

@@ -109,17 +109,17 @@ See the [resources | Kustomize](https://kubectl.docs.kubernetes.io/references/ku
Run As A Job
```
kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/job?ref=v0.30.0' | kubectl apply -f -
kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/job?ref=v0.30.1' | kubectl apply -f -
```
Run As A CronJob
```
kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/cronjob?ref=v0.30.0' | kubectl apply -f -
kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/cronjob?ref=v0.30.1' | kubectl apply -f -
```
Run As A Deployment
```
kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/deployment?ref=v0.30.0' | kubectl apply -f -
kustomize build 'github.com/kubernetes-sigs/descheduler/kubernetes/deployment?ref=v0.30.1' | kubectl apply -f -
```
## User Guide

View File

@@ -4,6 +4,7 @@ Starting with descheduler release v0.10.0 container images are available in the
Descheduler Version | Container Image | Architectures |
------------------- |-------------------------------------------------|-------------------------|
v0.30.1 | registry.k8s.io/descheduler/descheduler:v0.30.1 | AMD64<br>ARM64<br>ARMv7 |
v0.30.0 | registry.k8s.io/descheduler/descheduler:v0.30.0 | AMD64<br>ARM64<br>ARMv7 |
v0.29.0 | registry.k8s.io/descheduler/descheduler:v0.29.0 | AMD64<br>ARM64<br>ARMv7 |
v0.28.1 | registry.k8s.io/descheduler/descheduler:v0.28.1 | AMD64<br>ARM64<br>ARMv7 |

View File

@@ -16,7 +16,7 @@ spec:
priorityClassName: system-cluster-critical
containers:
- name: descheduler
image: registry.k8s.io/descheduler/descheduler:v0.30.0
image: registry.k8s.io/descheduler/descheduler:v0.30.1
volumeMounts:
- mountPath: /policy-dir
name: policy-volume

View File

@@ -19,7 +19,7 @@ spec:
serviceAccountName: descheduler-sa
containers:
- name: descheduler
image: registry.k8s.io/descheduler/descheduler:v0.30.0
image: registry.k8s.io/descheduler/descheduler:v0.30.1
imagePullPolicy: IfNotPresent
command:
- "/bin/descheduler"

View File

@@ -14,7 +14,7 @@ spec:
priorityClassName: system-cluster-critical
containers:
- name: descheduler
image: registry.k8s.io/descheduler/descheduler:v0.30.0
image: registry.k8s.io/descheduler/descheduler:v0.30.1
volumeMounts:
- mountPath: /policy-dir
name: policy-volume

View File

@@ -343,9 +343,30 @@ func podMatchesInterPodAntiAffinity(nodeIndexer podutil.GetPodsAssignedToNodeFun
if err != nil {
return false, fmt.Errorf("error listing all pods: %v", err)
}
assignedPodsInNamespace := podutil.GroupByNamespace(podsOnNode)
podsInANamespace := podutil.GroupByNamespace(podsOnNode)
nodeMap := utils.CreateNodeMap([]*v1.Node{node})
for _, term := range utils.GetPodAntiAffinityTerms(pod.Spec.Affinity.PodAntiAffinity) {
namespaces := utils.GetNamespacesFromPodAffinityTerm(pod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
klog.ErrorS(err, "Unable to convert LabelSelector into Selector")
return false, err
}
return utils.CheckPodsWithAntiAffinityExist(pod, podsInANamespace, nodeMap), nil
for namespace := range namespaces {
for _, assignedPod := range assignedPodsInNamespace[namespace] {
if assignedPod.Name == pod.Name || !utils.PodMatchesTermsNamespaceAndSelector(assignedPod, namespaces, selector) {
klog.V(4).InfoS("Pod doesn't match inter-pod anti-affinity rule of assigned pod on node", "candidatePod", klog.KObj(pod), "assignedPod", klog.KObj(assignedPod))
continue
}
if _, ok := node.Labels[term.TopologyKey]; ok {
klog.V(1).InfoS("Pod matches inter-pod anti-affinity rule of assigned pod on node", "candidatePod", klog.KObj(pod), "assignedPod", klog.KObj(assignedPod))
return true, nil
}
}
}
}
return false, nil
}

View File

@@ -759,6 +759,9 @@ func TestNodeFit(t *testing.T) {
"region": "main-region",
}
})
nodeNolabel := test.BuildTestNode("node", 64000, 128*1000*1000*1000, 2, nil)
tests := []struct {
description string
pod *v1.Pod
@@ -767,7 +770,7 @@ func TestNodeFit(t *testing.T) {
err error
}{
{
description: "insufficient cpu",
description: "Insufficient cpu",
pod: test.BuildTestPod("p1", 10000, 2*1000*1000*1000, "", nil),
node: node,
podsOnNode: []*v1.Pod{
@@ -776,7 +779,7 @@ func TestNodeFit(t *testing.T) {
err: errors.New("insufficient cpu"),
},
{
description: "insufficient pod num",
description: "Insufficient pod num",
pod: test.BuildTestPod("p1", 1000, 2*1000*1000*1000, "", nil),
node: node,
podsOnNode: []*v1.Pod{
@@ -786,7 +789,7 @@ func TestNodeFit(t *testing.T) {
err: errors.New("insufficient pods"),
},
{
description: "matches inter-pod anti-affinity rule of pod on node",
description: "Pod matches inter-pod anti-affinity rule of other pod on node",
pod: test.PodWithPodAntiAffinity(test.BuildTestPod("p1", 1000, 1000, node.Name, nil), "foo", "bar"),
node: node,
podsOnNode: []*v1.Pod{
@@ -795,11 +798,36 @@ func TestNodeFit(t *testing.T) {
err: errors.New("pod matches inter-pod anti-affinity rule of other pod on node"),
},
{
description: "pod fits on node",
description: "Pod doesn't match inter-pod anti-affinity rule of other pod on node, because pod and other pod is not same namespace",
pod: test.PodWithPodAntiAffinity(test.BuildTestPod("p1", 1000, 1000, node.Name, nil), "foo", "bar"),
node: node,
podsOnNode: []*v1.Pod{
test.PodWithPodAntiAffinity(test.BuildTestPod("p2", 1000, 1000, node.Name, func(pod *v1.Pod) {
pod.Namespace = "test"
}), "foo", "bar"),
},
},
{
description: "Pod doesn't match inter-pod anti-affinity rule of other pod on node, because other pod not match labels of pod",
pod: test.PodWithPodAntiAffinity(test.BuildTestPod("p1", 1000, 1000, node.Name, nil), "foo", "bar"),
node: node,
podsOnNode: []*v1.Pod{
test.PodWithPodAntiAffinity(test.BuildTestPod("p2", 1000, 1000, node.Name, nil), "foo1", "bar1"),
},
},
{
description: "Pod doesn't match inter-pod anti-affinity rule of other pod on node, because node have no topologyKey",
pod: test.PodWithPodAntiAffinity(test.BuildTestPod("p1", 1000, 1000, "node1", nil), "foo", "bar"),
node: nodeNolabel,
podsOnNode: []*v1.Pod{
test.PodWithPodAntiAffinity(test.BuildTestPod("p2", 1000, 1000, node.Name, nil), "foo", "bar"),
},
},
{
description: "Pod fits on node",
pod: test.BuildTestPod("p1", 1000, 1000, "", func(pod *v1.Pod) {}),
node: node,
podsOnNode: []*v1.Pod{},
err: nil,
},
}

View File

@@ -24,10 +24,37 @@ import (
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/component-helpers/scheduling/corev1"
"k8s.io/klog/v2"
)
// GetNamespacesFromPodAffinityTerm returns a set of names
// according to the namespaces indicated in podAffinityTerm.
// If namespaces is empty it considers the given pod's namespace.
func GetNamespacesFromPodAffinityTerm(pod *v1.Pod, podAffinityTerm *v1.PodAffinityTerm) sets.Set[string] {
names := sets.New[string]()
if len(podAffinityTerm.Namespaces) == 0 {
names.Insert(pod.Namespace)
} else {
names.Insert(podAffinityTerm.Namespaces...)
}
return names
}
// PodMatchesTermsNamespaceAndSelector returns true if the given <pod>
// matches the namespace and selector defined by <affinityPod>`s <term>.
func PodMatchesTermsNamespaceAndSelector(pod *v1.Pod, namespaces sets.Set[string], selector labels.Selector) bool {
if !namespaces.Has(pod.Namespace) {
return false
}
if !selector.Matches(labels.Set(pod.Labels)) {
return false
}
return true
}
// The following code has been copied from predicates package to avoid the
// huge vendoring issues, mostly copied from
// k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates/
@@ -309,42 +336,52 @@ func CreateNodeMap(nodes []*v1.Node) map[string]*v1.Node {
return m
}
// CheckPodsWithAntiAffinityExist checks if there are other pods on the node that the current pod cannot tolerate.
func CheckPodsWithAntiAffinityExist(pod *v1.Pod, pods map[string][]*v1.Pod, nodeMap map[string]*v1.Node) bool {
affinity := pod.Spec.Affinity
if affinity != nil && affinity.PodAntiAffinity != nil {
for _, term := range getPodAntiAffinityTerms(affinity.PodAntiAffinity) {
namespaces := getNamespacesFromPodAffinityTerm(pod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
klog.ErrorS(err, "Unable to convert LabelSelector into Selector")
return false
}
for namespace := range namespaces {
for _, existingPod := range pods[namespace] {
if existingPod.Name != pod.Name && podMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector) {
node, ok := nodeMap[pod.Spec.NodeName]
if !ok {
continue
}
nodeHavingExistingPod, ok := nodeMap[existingPod.Spec.NodeName]
if !ok {
continue
}
if hasSameLabelValue(node, nodeHavingExistingPod, term.TopologyKey) {
klog.V(1).InfoS("Found Pods matching PodAntiAffinity", "pod with anti-affinity", klog.KObj(pod))
return true
}
}
// CheckPodsWithAntiAffinityExist checks if there are other pods on the node that the current candidate pod cannot tolerate.
func CheckPodsWithAntiAffinityExist(candidatePod *v1.Pod, assignedPods map[string][]*v1.Pod, nodeMap map[string]*v1.Node) bool {
nodeHavingCandidatePod, ok := nodeMap[candidatePod.Spec.NodeName]
if !ok {
klog.Warningf("CandidatePod %s does not exist in nodeMap", klog.KObj(candidatePod))
return false
}
affinity := candidatePod.Spec.Affinity
if affinity == nil || affinity.PodAntiAffinity == nil {
return false
}
for _, term := range GetPodAntiAffinityTerms(affinity.PodAntiAffinity) {
namespaces := GetNamespacesFromPodAffinityTerm(candidatePod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
klog.ErrorS(err, "Unable to convert LabelSelector into Selector")
return false
}
for namespace := range namespaces {
for _, assignedPod := range assignedPods[namespace] {
if assignedPod.Name == candidatePod.Name || !PodMatchesTermsNamespaceAndSelector(assignedPod, namespaces, selector) {
klog.V(4).InfoS("CandidatePod doesn't matches inter-pod anti-affinity rule of assigned pod on node", "candidatePod", klog.KObj(candidatePod), "assignedPod", klog.KObj(assignedPod))
continue
}
nodeHavingAssignedPod, ok := nodeMap[assignedPod.Spec.NodeName]
if !ok {
continue
}
if hasSameLabelValue(nodeHavingCandidatePod, nodeHavingAssignedPod, term.TopologyKey) {
klog.V(1).InfoS("CandidatePod matches inter-pod anti-affinity rule of assigned pod on node", "candidatePod", klog.KObj(candidatePod), "assignedPod", klog.KObj(assignedPod))
return true
}
}
}
}
return false
}
// getPodAntiAffinityTerms gets the antiaffinity terms for the given pod.
func getPodAntiAffinityTerms(podAntiAffinity *v1.PodAntiAffinity) (terms []v1.PodAffinityTerm) {
// GetPodAntiAffinityTerms gets the antiaffinity terms for the given pod.
func GetPodAntiAffinityTerms(podAntiAffinity *v1.PodAntiAffinity) (terms []v1.PodAffinityTerm) {
if podAntiAffinity != nil {
if len(podAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0 {
terms = podAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution

View File

@@ -4,42 +4,13 @@ import (
"context"
"fmt"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
clientset "k8s.io/client-go/kubernetes"
"sigs.k8s.io/descheduler/pkg/api"
)
const SystemCriticalPriority = 2 * int32(1000000000)
// getNamespacesFromPodAffinityTerm returns a set of names
// according to the namespaces indicated in podAffinityTerm.
// If namespaces is empty it considers the given pod's namespace.
func getNamespacesFromPodAffinityTerm(pod *v1.Pod, podAffinityTerm *v1.PodAffinityTerm) sets.Set[string] {
names := sets.New[string]()
if len(podAffinityTerm.Namespaces) == 0 {
names.Insert(pod.Namespace)
} else {
names.Insert(podAffinityTerm.Namespaces...)
}
return names
}
// podMatchesTermsNamespaceAndSelector returns true if the given <pod>
// matches the namespace and selector defined by <affinityPod>`s <term>.
func podMatchesTermsNamespaceAndSelector(pod *v1.Pod, namespaces sets.Set[string], selector labels.Selector) bool {
if !namespaces.Has(pod.Namespace) {
return false
}
if !selector.Matches(labels.Set(pod.Labels)) {
return false
}
return true
}
// GetPriorityFromPriorityClass gets priority from the given priority class.
// If no priority class is provided, it will return SystemCriticalPriority by default.
func GetPriorityFromPriorityClass(ctx context.Context, client clientset.Interface, name string) (int32, error) {