1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 05:14:13 +01:00

Merge pull request #1425 from a7i/automated-cherry-pick-of-#1378-#1390-#1412-#1413-#1416-#1395-upstream-release-1.30

Automated cherry pick of #1378: Fix the replicas type for the helm-chart
#1390: allow 'falsey' value in cmdOption
#1412: fix helm's default deschedulerPolicy
#1413: fix TOC location in Readme
#1416: use cmd context instead of using context.Background()
#1395: fix the issue that the pod anti-filtering rules are not
This commit is contained in:
Kubernetes Prow Robot
2024-06-05 02:43:20 -07:00
committed by GitHub
10 changed files with 140 additions and 77 deletions

View File

@@ -2,7 +2,7 @@
![Release Charts](https://github.com/kubernetes-sigs/descheduler/workflows/Release%20Charts/badge.svg)
<p align="left">
Click at the [bullet list icon] at the top left corner of the Readme visualization for the github generated table of contents.
↗️ Click at the [bullet list icon] at the top right corner of the Readme visualization for the github generated table of contents.
</p>
<p align="center">

View File

@@ -1,7 +1,7 @@
Descheduler installed as a {{ .Values.kind }}.
{{- if eq .Values.kind "Deployment" }}
{{- if eq .Values.replicas 1.0}}
{{- if eq (.Values.replicas | int) 1 }}
WARNING: You set replica count as 1 and workload kind as Deployment however leaderElection is not enabled. Consider enabling Leader Election for HA mode.
{{- end}}
{{- if .Values.leaderElection }}

View File

@@ -81,7 +81,11 @@ spec:
args:
- --policy-config-file=/policy-dir/policy.yaml
{{- range $key, $value := .Values.cmdOptions }}
- {{ printf "--%s" $key }}{{ if $value }}={{ $value }}{{ end }}
{{- if ne $value nil }}
- {{ printf "--%s=%s" $key (toString $value) }}
{{- else }}
- {{ printf "--%s" $key }}
{{- end }}
{{- end }}
livenessProbe:
{{- toYaml .Values.livenessProbe | nindent 16 }}

View File

@@ -7,7 +7,7 @@ metadata:
labels:
{{- include "descheduler.labels" . | nindent 4 }}
spec:
{{- if gt .Values.replicas 1.0}}
{{- if gt (.Values.replicas | int) 1 }}
{{- if not .Values.leaderElection.enabled }}
{{- fail "You must set leaderElection to use more than 1 replica"}}
{{- end}}
@@ -53,7 +53,11 @@ spec:
- --policy-config-file=/policy-dir/policy.yaml
- --descheduling-interval={{ required "deschedulingInterval required for running as Deployment" .Values.deschedulingInterval }}
{{- range $key, $value := .Values.cmdOptions }}
- {{ printf "--%s" $key }}{{ if $value }}={{ $value }}{{ end }}
{{- if ne $value nil }}
- {{ printf "--%s=%s" $key (toString $value) }}
{{- else }}
- {{ printf "--%s" $key }}
{{- end }}
{{- end }}
{{- include "descheduler.leaderElection" . | nindent 12 }}
ports:

View File

@@ -111,14 +111,13 @@ deschedulerPolicy:
args:
podRestartThreshold: 100
includingInitContainers: true
- name: RemovePodsViolatingNodeTaints
- name: RemovePodsViolatingNodeAffinity
args:
nodeAffinityType:
- requiredDuringSchedulingIgnoredDuringExecution
- requiredDuringSchedulingIgnoredDuringExecution
- name: RemovePodsViolatingNodeTaints
- name: RemovePodsViolatingInterPodAntiAffinity
- name: RemovePodsViolatingTopologySpreadConstraint
args:
includeSoftConstraints: false
- name: LowNodeUtilization
args:
thresholds:
@@ -133,7 +132,6 @@ deschedulerPolicy:
balance:
enabled:
- RemoveDuplicates
- RemovePodsViolatingNodeAffinity
- RemovePodsViolatingTopologySpreadConstraint
- LowNodeUtilization
deschedule:

View File

@@ -77,7 +77,7 @@ func NewDeschedulerCommand(out io.Writer) *cobra.Command {
secureServing.DisableHTTP2 = !s.EnableHTTP2
ctx, done := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
ctx, done := signal.NotifyContext(cmd.Context(), syscall.SIGINT, syscall.SIGTERM)
pathRecorderMux := mux.NewPathRecorderMux("descheduler")
if !s.DisableMetrics {

View File

@@ -343,9 +343,30 @@ func podMatchesInterPodAntiAffinity(nodeIndexer podutil.GetPodsAssignedToNodeFun
if err != nil {
return false, fmt.Errorf("error listing all pods: %v", err)
}
assignedPodsInNamespace := podutil.GroupByNamespace(podsOnNode)
podsInANamespace := podutil.GroupByNamespace(podsOnNode)
nodeMap := utils.CreateNodeMap([]*v1.Node{node})
for _, term := range utils.GetPodAntiAffinityTerms(pod.Spec.Affinity.PodAntiAffinity) {
namespaces := utils.GetNamespacesFromPodAffinityTerm(pod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
klog.ErrorS(err, "Unable to convert LabelSelector into Selector")
return false, err
}
return utils.CheckPodsWithAntiAffinityExist(pod, podsInANamespace, nodeMap), nil
for namespace := range namespaces {
for _, assignedPod := range assignedPodsInNamespace[namespace] {
if assignedPod.Name == pod.Name || !utils.PodMatchesTermsNamespaceAndSelector(assignedPod, namespaces, selector) {
klog.V(4).InfoS("Pod doesn't match inter-pod anti-affinity rule of assigned pod on node", "candidatePod", klog.KObj(pod), "assignedPod", klog.KObj(assignedPod))
continue
}
if _, ok := node.Labels[term.TopologyKey]; ok {
klog.V(1).InfoS("Pod matches inter-pod anti-affinity rule of assigned pod on node", "candidatePod", klog.KObj(pod), "assignedPod", klog.KObj(assignedPod))
return true, nil
}
}
}
}
return false, nil
}

View File

@@ -759,6 +759,9 @@ func TestNodeFit(t *testing.T) {
"region": "main-region",
}
})
nodeNolabel := test.BuildTestNode("node", 64000, 128*1000*1000*1000, 2, nil)
tests := []struct {
description string
pod *v1.Pod
@@ -767,7 +770,7 @@ func TestNodeFit(t *testing.T) {
err error
}{
{
description: "insufficient cpu",
description: "Insufficient cpu",
pod: test.BuildTestPod("p1", 10000, 2*1000*1000*1000, "", nil),
node: node,
podsOnNode: []*v1.Pod{
@@ -776,7 +779,7 @@ func TestNodeFit(t *testing.T) {
err: errors.New("insufficient cpu"),
},
{
description: "insufficient pod num",
description: "Insufficient pod num",
pod: test.BuildTestPod("p1", 1000, 2*1000*1000*1000, "", nil),
node: node,
podsOnNode: []*v1.Pod{
@@ -786,7 +789,7 @@ func TestNodeFit(t *testing.T) {
err: errors.New("insufficient pods"),
},
{
description: "matches inter-pod anti-affinity rule of pod on node",
description: "Pod matches inter-pod anti-affinity rule of other pod on node",
pod: test.PodWithPodAntiAffinity(test.BuildTestPod("p1", 1000, 1000, node.Name, nil), "foo", "bar"),
node: node,
podsOnNode: []*v1.Pod{
@@ -795,11 +798,36 @@ func TestNodeFit(t *testing.T) {
err: errors.New("pod matches inter-pod anti-affinity rule of other pod on node"),
},
{
description: "pod fits on node",
description: "Pod doesn't match inter-pod anti-affinity rule of other pod on node, because pod and other pod is not same namespace",
pod: test.PodWithPodAntiAffinity(test.BuildTestPod("p1", 1000, 1000, node.Name, nil), "foo", "bar"),
node: node,
podsOnNode: []*v1.Pod{
test.PodWithPodAntiAffinity(test.BuildTestPod("p2", 1000, 1000, node.Name, func(pod *v1.Pod) {
pod.Namespace = "test"
}), "foo", "bar"),
},
},
{
description: "Pod doesn't match inter-pod anti-affinity rule of other pod on node, because other pod not match labels of pod",
pod: test.PodWithPodAntiAffinity(test.BuildTestPod("p1", 1000, 1000, node.Name, nil), "foo", "bar"),
node: node,
podsOnNode: []*v1.Pod{
test.PodWithPodAntiAffinity(test.BuildTestPod("p2", 1000, 1000, node.Name, nil), "foo1", "bar1"),
},
},
{
description: "Pod doesn't match inter-pod anti-affinity rule of other pod on node, because node have no topologyKey",
pod: test.PodWithPodAntiAffinity(test.BuildTestPod("p1", 1000, 1000, "node1", nil), "foo", "bar"),
node: nodeNolabel,
podsOnNode: []*v1.Pod{
test.PodWithPodAntiAffinity(test.BuildTestPod("p2", 1000, 1000, node.Name, nil), "foo", "bar"),
},
},
{
description: "Pod fits on node",
pod: test.BuildTestPod("p1", 1000, 1000, "", func(pod *v1.Pod) {}),
node: node,
podsOnNode: []*v1.Pod{},
err: nil,
},
}

View File

@@ -24,10 +24,37 @@ import (
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/component-helpers/scheduling/corev1"
"k8s.io/klog/v2"
)
// GetNamespacesFromPodAffinityTerm returns a set of names
// according to the namespaces indicated in podAffinityTerm.
// If namespaces is empty it considers the given pod's namespace.
func GetNamespacesFromPodAffinityTerm(pod *v1.Pod, podAffinityTerm *v1.PodAffinityTerm) sets.Set[string] {
names := sets.New[string]()
if len(podAffinityTerm.Namespaces) == 0 {
names.Insert(pod.Namespace)
} else {
names.Insert(podAffinityTerm.Namespaces...)
}
return names
}
// PodMatchesTermsNamespaceAndSelector returns true if the given <pod>
// matches the namespace and selector defined by <affinityPod>`s <term>.
func PodMatchesTermsNamespaceAndSelector(pod *v1.Pod, namespaces sets.Set[string], selector labels.Selector) bool {
if !namespaces.Has(pod.Namespace) {
return false
}
if !selector.Matches(labels.Set(pod.Labels)) {
return false
}
return true
}
// The following code has been copied from predicates package to avoid the
// huge vendoring issues, mostly copied from
// k8s.io/kubernetes/plugin/pkg/scheduler/algorithm/predicates/
@@ -309,42 +336,52 @@ func CreateNodeMap(nodes []*v1.Node) map[string]*v1.Node {
return m
}
// CheckPodsWithAntiAffinityExist checks if there are other pods on the node that the current pod cannot tolerate.
func CheckPodsWithAntiAffinityExist(pod *v1.Pod, pods map[string][]*v1.Pod, nodeMap map[string]*v1.Node) bool {
affinity := pod.Spec.Affinity
if affinity != nil && affinity.PodAntiAffinity != nil {
for _, term := range getPodAntiAffinityTerms(affinity.PodAntiAffinity) {
namespaces := getNamespacesFromPodAffinityTerm(pod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
klog.ErrorS(err, "Unable to convert LabelSelector into Selector")
return false
}
for namespace := range namespaces {
for _, existingPod := range pods[namespace] {
if existingPod.Name != pod.Name && podMatchesTermsNamespaceAndSelector(existingPod, namespaces, selector) {
node, ok := nodeMap[pod.Spec.NodeName]
if !ok {
continue
}
nodeHavingExistingPod, ok := nodeMap[existingPod.Spec.NodeName]
if !ok {
continue
}
if hasSameLabelValue(node, nodeHavingExistingPod, term.TopologyKey) {
klog.V(1).InfoS("Found Pods matching PodAntiAffinity", "pod with anti-affinity", klog.KObj(pod))
return true
}
}
// CheckPodsWithAntiAffinityExist checks if there are other pods on the node that the current candidate pod cannot tolerate.
func CheckPodsWithAntiAffinityExist(candidatePod *v1.Pod, assignedPods map[string][]*v1.Pod, nodeMap map[string]*v1.Node) bool {
nodeHavingCandidatePod, ok := nodeMap[candidatePod.Spec.NodeName]
if !ok {
klog.Warningf("CandidatePod %s does not exist in nodeMap", klog.KObj(candidatePod))
return false
}
affinity := candidatePod.Spec.Affinity
if affinity == nil || affinity.PodAntiAffinity == nil {
return false
}
for _, term := range GetPodAntiAffinityTerms(affinity.PodAntiAffinity) {
namespaces := GetNamespacesFromPodAffinityTerm(candidatePod, &term)
selector, err := metav1.LabelSelectorAsSelector(term.LabelSelector)
if err != nil {
klog.ErrorS(err, "Unable to convert LabelSelector into Selector")
return false
}
for namespace := range namespaces {
for _, assignedPod := range assignedPods[namespace] {
if assignedPod.Name == candidatePod.Name || !PodMatchesTermsNamespaceAndSelector(assignedPod, namespaces, selector) {
klog.V(4).InfoS("CandidatePod doesn't matches inter-pod anti-affinity rule of assigned pod on node", "candidatePod", klog.KObj(candidatePod), "assignedPod", klog.KObj(assignedPod))
continue
}
nodeHavingAssignedPod, ok := nodeMap[assignedPod.Spec.NodeName]
if !ok {
continue
}
if hasSameLabelValue(nodeHavingCandidatePod, nodeHavingAssignedPod, term.TopologyKey) {
klog.V(1).InfoS("CandidatePod matches inter-pod anti-affinity rule of assigned pod on node", "candidatePod", klog.KObj(candidatePod), "assignedPod", klog.KObj(assignedPod))
return true
}
}
}
}
return false
}
// getPodAntiAffinityTerms gets the antiaffinity terms for the given pod.
func getPodAntiAffinityTerms(podAntiAffinity *v1.PodAntiAffinity) (terms []v1.PodAffinityTerm) {
// GetPodAntiAffinityTerms gets the antiaffinity terms for the given pod.
func GetPodAntiAffinityTerms(podAntiAffinity *v1.PodAntiAffinity) (terms []v1.PodAffinityTerm) {
if podAntiAffinity != nil {
if len(podAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution) != 0 {
terms = podAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution

View File

@@ -4,42 +4,13 @@ import (
"context"
"fmt"
v1 "k8s.io/api/core/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/util/sets"
clientset "k8s.io/client-go/kubernetes"
"sigs.k8s.io/descheduler/pkg/api"
)
const SystemCriticalPriority = 2 * int32(1000000000)
// getNamespacesFromPodAffinityTerm returns a set of names
// according to the namespaces indicated in podAffinityTerm.
// If namespaces is empty it considers the given pod's namespace.
func getNamespacesFromPodAffinityTerm(pod *v1.Pod, podAffinityTerm *v1.PodAffinityTerm) sets.Set[string] {
names := sets.New[string]()
if len(podAffinityTerm.Namespaces) == 0 {
names.Insert(pod.Namespace)
} else {
names.Insert(podAffinityTerm.Namespaces...)
}
return names
}
// podMatchesTermsNamespaceAndSelector returns true if the given <pod>
// matches the namespace and selector defined by <affinityPod>`s <term>.
func podMatchesTermsNamespaceAndSelector(pod *v1.Pod, namespaces sets.Set[string], selector labels.Selector) bool {
if !namespaces.Has(pod.Namespace) {
return false
}
if !selector.Matches(labels.Set(pod.Labels)) {
return false
}
return true
}
// GetPriorityFromPriorityClass gets priority from the given priority class.
// If no priority class is provided, it will return SystemCriticalPriority by default.
func GetPriorityFromPriorityClass(ctx context.Context, client clientset.Interface, name string) (int32, error) {