1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 13:29:11 +01:00

[nodeutilization]: actual usage client through kubernetes metrics

This commit is contained in:
Jan Chaloupka
2024-11-07 16:23:16 +01:00
parent c86416612e
commit 6567f01e86
25 changed files with 1643 additions and 134 deletions

View File

@@ -8,6 +8,7 @@ import (
clientset "k8s.io/client-go/kubernetes"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
"sigs.k8s.io/descheduler/pkg/descheduler/metricscollector"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types"
)
@@ -18,6 +19,7 @@ type HandleImpl struct {
SharedInformerFactoryImpl informers.SharedInformerFactory
EvictorFilterImpl frameworktypes.EvictorPlugin
PodEvictorImpl *evictions.PodEvictor
MetricsCollectorImpl *metricscollector.MetricsCollector
}
var _ frameworktypes.Handle = &HandleImpl{}
@@ -26,6 +28,10 @@ func (hi *HandleImpl) ClientSet() clientset.Interface {
return hi.ClientsetImpl
}
func (hi *HandleImpl) MetricsCollector() *metricscollector.MetricsCollector {
return hi.MetricsCollectorImpl
}
func (hi *HandleImpl) GetPodsAssignedToNodeFunc() podutil.GetPodsAssignedToNodeFunc {
return hi.GetPodsAssignedToNodeFuncImpl
}

View File

@@ -24,6 +24,7 @@ import (
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/klog/v2"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
@@ -88,6 +89,16 @@ func NewLowNodeUtilization(args runtime.Object, handle frameworktypes.Handle) (f
resourceNames := getResourceNames(lowNodeUtilizationArgsArgs.Thresholds)
var usageClient usageClient
if lowNodeUtilizationArgsArgs.MetricsUtilization.MetricsServer {
if handle.MetricsCollector() == nil {
return nil, fmt.Errorf("metrics client not initialized")
}
usageClient = newActualUsageClient(resourceNames, handle.GetPodsAssignedToNodeFunc(), handle.MetricsCollector())
} else {
usageClient = newRequestedUsageClient(resourceNames, handle.GetPodsAssignedToNodeFunc())
}
return &LowNodeUtilization{
handle: handle,
args: lowNodeUtilizationArgsArgs,
@@ -95,7 +106,7 @@ func NewLowNodeUtilization(args runtime.Object, handle frameworktypes.Handle) (f
overutilizationCriteria: overutilizationCriteria,
resourceNames: resourceNames,
podFilter: podFilter,
usageClient: newRequestedUsageClient(resourceNames, handle.GetPodsAssignedToNodeFunc()),
usageClient: usageClient,
}, nil
}

View File

@@ -21,19 +21,23 @@ import (
"fmt"
"testing"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/framework/plugins/defaultevictor"
frameworktesting "sigs.k8s.io/descheduler/pkg/framework/testing"
frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types"
v1 "k8s.io/api/core/v1"
policy "k8s.io/api/policy/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/informers"
"k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"k8s.io/metrics/pkg/apis/metrics/v1beta1"
fakemetricsclient "k8s.io/metrics/pkg/client/clientset/versioned/fake"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
"sigs.k8s.io/descheduler/pkg/descheduler/metricscollector"
"sigs.k8s.io/descheduler/pkg/framework/plugins/defaultevictor"
frameworktesting "sigs.k8s.io/descheduler/pkg/framework/testing"
frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types"
"sigs.k8s.io/descheduler/pkg/utils"
"sigs.k8s.io/descheduler/test"
)
@@ -48,14 +52,17 @@ func TestLowNodeUtilization(t *testing.T) {
notMatchingNodeSelectorValue := "east"
testCases := []struct {
name string
useDeviationThresholds bool
thresholds, targetThresholds api.ResourceThresholds
nodes []*v1.Node
pods []*v1.Pod
expectedPodsEvicted uint
evictedPods []string
evictableNamespaces *api.Namespaces
name string
useDeviationThresholds bool
thresholds, targetThresholds api.ResourceThresholds
nodes []*v1.Node
pods []*v1.Pod
nodemetricses []*v1beta1.NodeMetrics
podmetricses []*v1beta1.PodMetrics
expectedPodsEvicted uint
expectedPodsWithMetricsEvicted uint
evictedPods []string
evictableNamespaces *api.Namespaces
}{
{
name: "no evictable pods",
@@ -103,7 +110,20 @@ func TestLowNodeUtilization(t *testing.T) {
}),
test.BuildTestPod("p9", 400, 0, n2NodeName, test.SetRSOwnerRef),
},
expectedPodsEvicted: 0,
nodemetricses: []*v1beta1.NodeMetrics{
test.BuildNodeMetrics(n1NodeName, 2401, 1714978816),
test.BuildNodeMetrics(n2NodeName, 401, 1714978816),
test.BuildNodeMetrics(n3NodeName, 10, 1714978816),
},
podmetricses: []*v1beta1.PodMetrics{
test.BuildPodMetrics("p1", 401, 0),
test.BuildPodMetrics("p2", 401, 0),
test.BuildPodMetrics("p3", 401, 0),
test.BuildPodMetrics("p4", 401, 0),
test.BuildPodMetrics("p5", 401, 0),
},
expectedPodsEvicted: 0,
expectedPodsWithMetricsEvicted: 0,
},
{
name: "without priorities",
@@ -153,7 +173,20 @@ func TestLowNodeUtilization(t *testing.T) {
}),
test.BuildTestPod("p9", 400, 0, n2NodeName, test.SetRSOwnerRef),
},
expectedPodsEvicted: 4,
nodemetricses: []*v1beta1.NodeMetrics{
test.BuildNodeMetrics(n1NodeName, 3201, 0),
test.BuildNodeMetrics(n2NodeName, 401, 0),
test.BuildNodeMetrics(n3NodeName, 11, 0),
},
podmetricses: []*v1beta1.PodMetrics{
test.BuildPodMetrics("p1", 401, 0),
test.BuildPodMetrics("p2", 401, 0),
test.BuildPodMetrics("p3", 401, 0),
test.BuildPodMetrics("p4", 401, 0),
test.BuildPodMetrics("p5", 401, 0),
},
expectedPodsEvicted: 4,
expectedPodsWithMetricsEvicted: 4,
},
{
name: "without priorities, but excluding namespaces",
@@ -218,12 +251,25 @@ func TestLowNodeUtilization(t *testing.T) {
}),
test.BuildTestPod("p9", 400, 0, n2NodeName, test.SetRSOwnerRef),
},
nodemetricses: []*v1beta1.NodeMetrics{
test.BuildNodeMetrics(n1NodeName, 3201, 0),
test.BuildNodeMetrics(n2NodeName, 401, 0),
test.BuildNodeMetrics(n3NodeName, 11, 0),
},
podmetricses: []*v1beta1.PodMetrics{
test.BuildPodMetrics("p1", 401, 0),
test.BuildPodMetrics("p2", 401, 0),
test.BuildPodMetrics("p3", 401, 0),
test.BuildPodMetrics("p4", 401, 0),
test.BuildPodMetrics("p5", 401, 0),
},
evictableNamespaces: &api.Namespaces{
Exclude: []string{
"namespace1",
},
},
expectedPodsEvicted: 0,
expectedPodsEvicted: 0,
expectedPodsWithMetricsEvicted: 0,
},
{
name: "without priorities, but include only default namespace",
@@ -283,12 +329,25 @@ func TestLowNodeUtilization(t *testing.T) {
}),
test.BuildTestPod("p9", 400, 0, n2NodeName, test.SetRSOwnerRef),
},
nodemetricses: []*v1beta1.NodeMetrics{
test.BuildNodeMetrics(n1NodeName, 3201, 0),
test.BuildNodeMetrics(n2NodeName, 401, 0),
test.BuildNodeMetrics(n3NodeName, 11, 0),
},
podmetricses: []*v1beta1.PodMetrics{
test.BuildPodMetrics("p1", 401, 0),
test.BuildPodMetrics("p2", 401, 0),
test.BuildPodMetrics("p3", 401, 0),
test.BuildPodMetrics("p4", 401, 0),
test.BuildPodMetrics("p5", 401, 0),
},
evictableNamespaces: &api.Namespaces{
Include: []string{
"default",
},
},
expectedPodsEvicted: 2,
expectedPodsEvicted: 2,
expectedPodsWithMetricsEvicted: 2,
},
{
name: "without priorities stop when cpu capacity is depleted",
@@ -306,14 +365,14 @@ func TestLowNodeUtilization(t *testing.T) {
test.BuildTestNode(n3NodeName, 4000, 3000, 10, test.SetNodeUnschedulable),
},
pods: []*v1.Pod{
test.BuildTestPod("p1", 400, 300, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p2", 400, 300, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p3", 400, 300, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p4", 400, 300, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p5", 400, 300, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p2", 400, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p3", 400, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p4", 400, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p5", 400, 0, n1NodeName, test.SetRSOwnerRef),
// These won't be evicted.
test.BuildTestPod("p6", 400, 300, n1NodeName, test.SetDSOwnerRef),
test.BuildTestPod("p7", 400, 300, n1NodeName, func(pod *v1.Pod) {
test.BuildTestPod("p6", 400, 0, n1NodeName, test.SetDSOwnerRef),
test.BuildTestPod("p7", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A pod with local storage.
test.SetNormalOwnerRef(pod)
pod.Spec.Volumes = []v1.Volume{
@@ -330,17 +389,29 @@ func TestLowNodeUtilization(t *testing.T) {
// A Mirror Pod.
pod.Annotations = test.GetMirrorPodAnnotation()
}),
test.BuildTestPod("p8", 400, 300, n1NodeName, func(pod *v1.Pod) {
test.BuildTestPod("p8", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A Critical Pod.
test.SetNormalOwnerRef(pod)
pod.Namespace = "kube-system"
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
}),
test.BuildTestPod("p9", 400, 2100, n2NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p9", 400, 0, n2NodeName, test.SetRSOwnerRef),
},
// 4 pods available for eviction based on v1.ResourcePods, only 3 pods can be evicted before cpu is depleted
expectedPodsEvicted: 3,
nodemetricses: []*v1beta1.NodeMetrics{
test.BuildNodeMetrics(n1NodeName, 3201, 0),
test.BuildNodeMetrics(n2NodeName, 401, 0),
test.BuildNodeMetrics(n3NodeName, 0, 0),
},
podmetricses: []*v1beta1.PodMetrics{
test.BuildPodMetrics("p1", 401, 0),
test.BuildPodMetrics("p2", 401, 0),
test.BuildPodMetrics("p3", 401, 0),
test.BuildPodMetrics("p4", 401, 0),
test.BuildPodMetrics("p5", 401, 0),
},
expectedPodsEvicted: 4,
expectedPodsWithMetricsEvicted: 4,
},
{
name: "with priorities",
@@ -410,7 +481,20 @@ func TestLowNodeUtilization(t *testing.T) {
}),
test.BuildTestPod("p9", 400, 0, n2NodeName, test.SetRSOwnerRef),
},
expectedPodsEvicted: 4,
nodemetricses: []*v1beta1.NodeMetrics{
test.BuildNodeMetrics(n1NodeName, 3201, 0),
test.BuildNodeMetrics(n2NodeName, 401, 0),
test.BuildNodeMetrics(n3NodeName, 11, 0),
},
podmetricses: []*v1beta1.PodMetrics{
test.BuildPodMetrics("p1", 401, 0),
test.BuildPodMetrics("p2", 401, 0),
test.BuildPodMetrics("p3", 401, 0),
test.BuildPodMetrics("p4", 401, 0),
test.BuildPodMetrics("p5", 401, 0),
},
expectedPodsEvicted: 4,
expectedPodsWithMetricsEvicted: 4,
},
{
name: "without priorities evicting best-effort pods only",
@@ -478,8 +562,21 @@ func TestLowNodeUtilization(t *testing.T) {
}),
test.BuildTestPod("p9", 400, 0, n2NodeName, test.SetRSOwnerRef),
},
expectedPodsEvicted: 4,
evictedPods: []string{"p1", "p2", "p4", "p5"},
nodemetricses: []*v1beta1.NodeMetrics{
test.BuildNodeMetrics(n1NodeName, 3201, 0),
test.BuildNodeMetrics(n2NodeName, 401, 0),
test.BuildNodeMetrics(n3NodeName, 11, 0),
},
podmetricses: []*v1beta1.PodMetrics{
test.BuildPodMetrics("p1", 401, 0),
test.BuildPodMetrics("p2", 401, 0),
test.BuildPodMetrics("p3", 401, 0),
test.BuildPodMetrics("p4", 401, 0),
test.BuildPodMetrics("p5", 401, 0),
},
expectedPodsEvicted: 4,
expectedPodsWithMetricsEvicted: 4,
evictedPods: []string{"p1", "p2", "p4", "p5"},
},
{
name: "with extended resource",
@@ -558,8 +655,21 @@ func TestLowNodeUtilization(t *testing.T) {
test.SetPodExtendedResourceRequest(pod, extendedResource, 1)
}),
},
nodemetricses: []*v1beta1.NodeMetrics{
test.BuildNodeMetrics(n1NodeName, 3201, 0),
test.BuildNodeMetrics(n2NodeName, 401, 0),
test.BuildNodeMetrics(n3NodeName, 11, 0),
},
podmetricses: []*v1beta1.PodMetrics{
test.BuildPodMetrics("p1", 401, 0),
test.BuildPodMetrics("p2", 401, 0),
test.BuildPodMetrics("p3", 401, 0),
test.BuildPodMetrics("p4", 401, 0),
test.BuildPodMetrics("p5", 401, 0),
},
// 4 pods available for eviction based on v1.ResourcePods, only 3 pods can be evicted before extended resource is depleted
expectedPodsEvicted: 3,
expectedPodsEvicted: 3,
expectedPodsWithMetricsEvicted: 0,
},
{
name: "with extended resource in some of nodes",
@@ -586,8 +696,21 @@ func TestLowNodeUtilization(t *testing.T) {
}),
test.BuildTestPod("p9", 0, 0, n2NodeName, test.SetRSOwnerRef),
},
nodemetricses: []*v1beta1.NodeMetrics{
test.BuildNodeMetrics(n1NodeName, 3201, 0),
test.BuildNodeMetrics(n2NodeName, 401, 0),
test.BuildNodeMetrics(n3NodeName, 11, 0),
},
podmetricses: []*v1beta1.PodMetrics{
test.BuildPodMetrics("p1", 401, 0),
test.BuildPodMetrics("p2", 401, 0),
test.BuildPodMetrics("p3", 401, 0),
test.BuildPodMetrics("p4", 401, 0),
test.BuildPodMetrics("p5", 401, 0),
},
// 0 pods available for eviction because there's no enough extended resource in node2
expectedPodsEvicted: 0,
expectedPodsEvicted: 0,
expectedPodsWithMetricsEvicted: 0,
},
{
name: "without priorities, but only other node is unschedulable",
@@ -636,7 +759,19 @@ func TestLowNodeUtilization(t *testing.T) {
pod.Spec.Priority = &priority
}),
},
expectedPodsEvicted: 0,
nodemetricses: []*v1beta1.NodeMetrics{
test.BuildNodeMetrics(n1NodeName, 3201, 0),
test.BuildNodeMetrics(n2NodeName, 401, 0),
},
podmetricses: []*v1beta1.PodMetrics{
test.BuildPodMetrics("p1", 401, 0),
test.BuildPodMetrics("p2", 401, 0),
test.BuildPodMetrics("p3", 401, 0),
test.BuildPodMetrics("p4", 401, 0),
test.BuildPodMetrics("p5", 401, 0),
},
expectedPodsEvicted: 0,
expectedPodsWithMetricsEvicted: 0,
},
{
name: "without priorities, but only other node doesn't match pod node selector for p4 and p5",
@@ -701,7 +836,17 @@ func TestLowNodeUtilization(t *testing.T) {
pod.Spec.Priority = &priority
}),
},
expectedPodsEvicted: 3,
nodemetricses: []*v1beta1.NodeMetrics{
test.BuildNodeMetrics(n1NodeName, 3201, 0),
test.BuildNodeMetrics(n2NodeName, 401, 0),
},
podmetricses: []*v1beta1.PodMetrics{
test.BuildPodMetrics("p1", 401, 0),
test.BuildPodMetrics("p2", 401, 0),
test.BuildPodMetrics("p3", 401, 0),
},
expectedPodsEvicted: 3,
expectedPodsWithMetricsEvicted: 3,
},
{
name: "without priorities, but only other node doesn't match pod node affinity for p4 and p5",
@@ -795,7 +940,17 @@ func TestLowNodeUtilization(t *testing.T) {
}),
test.BuildTestPod("p9", 0, 0, n2NodeName, test.SetRSOwnerRef),
},
expectedPodsEvicted: 3,
nodemetricses: []*v1beta1.NodeMetrics{
test.BuildNodeMetrics(n1NodeName, 3201, 0),
test.BuildNodeMetrics(n2NodeName, 401, 0),
},
podmetricses: []*v1beta1.PodMetrics{
test.BuildPodMetrics("p1", 401, 0),
test.BuildPodMetrics("p2", 401, 0),
test.BuildPodMetrics("p3", 401, 0),
},
expectedPodsEvicted: 3,
expectedPodsWithMetricsEvicted: 3,
},
{
name: "deviation thresholds",
@@ -847,71 +1002,219 @@ func TestLowNodeUtilization(t *testing.T) {
}),
test.BuildTestPod("p9", 400, 0, n2NodeName, test.SetRSOwnerRef),
},
expectedPodsEvicted: 2,
evictedPods: []string{},
nodemetricses: []*v1beta1.NodeMetrics{
test.BuildNodeMetrics(n1NodeName, 3201, 0),
test.BuildNodeMetrics(n2NodeName, 401, 0),
test.BuildNodeMetrics(n3NodeName, 11, 0),
},
podmetricses: []*v1beta1.PodMetrics{
test.BuildPodMetrics("p1", 401, 0),
test.BuildPodMetrics("p2", 401, 0),
test.BuildPodMetrics("p3", 401, 0),
test.BuildPodMetrics("p4", 401, 0),
test.BuildPodMetrics("p5", 401, 0),
},
expectedPodsEvicted: 2,
expectedPodsWithMetricsEvicted: 2,
evictedPods: []string{},
},
{
name: "without priorities different evictions for requested and actual resources",
thresholds: api.ResourceThresholds{
v1.ResourceCPU: 30,
v1.ResourcePods: 30,
},
targetThresholds: api.ResourceThresholds{
v1.ResourceCPU: 50,
v1.ResourcePods: 50,
},
nodes: []*v1.Node{
test.BuildTestNode(n1NodeName, 4000, 3000, 9, nil),
test.BuildTestNode(n2NodeName, 4000, 3000, 10, func(node *v1.Node) {
node.ObjectMeta.Labels = map[string]string{
nodeSelectorKey: notMatchingNodeSelectorValue,
}
}),
},
pods: []*v1.Pod{
test.BuildTestPod("p1", 400, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p2", 400, 0, n1NodeName, test.SetRSOwnerRef),
test.BuildTestPod("p3", 400, 0, n1NodeName, test.SetRSOwnerRef),
// These won't be evicted.
test.BuildTestPod("p4", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A pod with affinity to run in the "west" datacenter upon scheduling
test.SetNormalOwnerRef(pod)
pod.Spec.Affinity = &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: nodeSelectorKey,
Operator: "In",
Values: []string{nodeSelectorValue},
},
},
},
},
},
},
}
}),
test.BuildTestPod("p5", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A pod with affinity to run in the "west" datacenter upon scheduling
test.SetNormalOwnerRef(pod)
pod.Spec.Affinity = &v1.Affinity{
NodeAffinity: &v1.NodeAffinity{
RequiredDuringSchedulingIgnoredDuringExecution: &v1.NodeSelector{
NodeSelectorTerms: []v1.NodeSelectorTerm{
{
MatchExpressions: []v1.NodeSelectorRequirement{
{
Key: nodeSelectorKey,
Operator: "In",
Values: []string{nodeSelectorValue},
},
},
},
},
},
},
}
}),
test.BuildTestPod("p6", 400, 0, n1NodeName, test.SetDSOwnerRef),
test.BuildTestPod("p7", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A pod with local storage.
test.SetNormalOwnerRef(pod)
pod.Spec.Volumes = []v1.Volume{
{
Name: "sample",
VolumeSource: v1.VolumeSource{
HostPath: &v1.HostPathVolumeSource{Path: "somePath"},
EmptyDir: &v1.EmptyDirVolumeSource{
SizeLimit: resource.NewQuantity(int64(10), resource.BinarySI),
},
},
},
}
// A Mirror Pod.
pod.Annotations = test.GetMirrorPodAnnotation()
}),
test.BuildTestPod("p8", 400, 0, n1NodeName, func(pod *v1.Pod) {
// A Critical Pod.
test.SetNormalOwnerRef(pod)
pod.Namespace = "kube-system"
priority := utils.SystemCriticalPriority
pod.Spec.Priority = &priority
}),
test.BuildTestPod("p9", 0, 0, n2NodeName, test.SetRSOwnerRef),
},
nodemetricses: []*v1beta1.NodeMetrics{
test.BuildNodeMetrics(n1NodeName, 3201, 0),
test.BuildNodeMetrics(n2NodeName, 401, 0),
},
podmetricses: []*v1beta1.PodMetrics{
test.BuildPodMetrics("p1", 801, 0),
test.BuildPodMetrics("p2", 801, 0),
test.BuildPodMetrics("p3", 801, 0),
},
expectedPodsEvicted: 3,
expectedPodsWithMetricsEvicted: 2,
},
}
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
testFnc := func(metricsEnabled bool, expectedPodsEvicted uint) func(t *testing.T) {
return func(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
var objs []runtime.Object
for _, node := range tc.nodes {
objs = append(objs, node)
}
for _, pod := range tc.pods {
objs = append(objs, pod)
}
fakeClient := fake.NewSimpleClientset(objs...)
var objs []runtime.Object
for _, node := range tc.nodes {
objs = append(objs, node)
}
for _, pod := range tc.pods {
objs = append(objs, pod)
}
podsForEviction := make(map[string]struct{})
for _, pod := range tc.evictedPods {
podsForEviction[pod] = struct{}{}
}
fakeClient := fake.NewSimpleClientset(objs...)
evictionFailed := false
if len(tc.evictedPods) > 0 {
fakeClient.Fake.AddReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) {
getAction := action.(core.CreateAction)
obj := getAction.GetObject()
if eviction, ok := obj.(*policy.Eviction); ok {
if _, exists := podsForEviction[eviction.Name]; exists {
return true, obj, nil
}
evictionFailed = true
return true, nil, fmt.Errorf("pod %q was unexpectedly evicted", eviction.Name)
var collector *metricscollector.MetricsCollector
if metricsEnabled {
metricsClientset := fakemetricsclient.NewSimpleClientset()
for _, nodemetrics := range tc.nodemetricses {
metricsClientset.Tracker().Create(nodesgvr, nodemetrics, "")
}
for _, podmetrics := range tc.podmetricses {
metricsClientset.Tracker().Create(podsgvr, podmetrics, podmetrics.Namespace)
}
return true, obj, nil
})
}
handle, podEvictor, err := frameworktesting.InitFrameworkHandle(ctx, fakeClient, nil, defaultevictor.DefaultEvictorArgs{NodeFit: true}, nil)
if err != nil {
t.Fatalf("Unable to initialize a framework handle: %v", err)
}
sharedInformerFactory := informers.NewSharedInformerFactory(fakeClient, 0)
nodeLister := sharedInformerFactory.Core().V1().Nodes().Lister()
sharedInformerFactory.Start(ctx.Done())
sharedInformerFactory.WaitForCacheSync(ctx.Done())
plugin, err := NewLowNodeUtilization(&LowNodeUtilizationArgs{
Thresholds: tc.thresholds,
TargetThresholds: tc.targetThresholds,
UseDeviationThresholds: tc.useDeviationThresholds,
EvictableNamespaces: tc.evictableNamespaces,
},
handle)
if err != nil {
t.Fatalf("Unable to initialize the plugin: %v", err)
}
plugin.(frameworktypes.BalancePlugin).Balance(ctx, tc.nodes)
collector = metricscollector.NewMetricsCollector(nodeLister, metricsClientset, labels.Everything())
err := collector.Collect(ctx)
if err != nil {
t.Fatalf("unable to collect metrics: %v", err)
}
}
podsEvicted := podEvictor.TotalEvicted()
if tc.expectedPodsEvicted != podsEvicted {
t.Errorf("Expected %v pods to be evicted but %v got evicted", tc.expectedPodsEvicted, podsEvicted)
podsForEviction := make(map[string]struct{})
for _, pod := range tc.evictedPods {
podsForEviction[pod] = struct{}{}
}
evictionFailed := false
if len(tc.evictedPods) > 0 {
fakeClient.Fake.AddReactor("create", "pods", func(action core.Action) (bool, runtime.Object, error) {
getAction := action.(core.CreateAction)
obj := getAction.GetObject()
if eviction, ok := obj.(*policy.Eviction); ok {
if _, exists := podsForEviction[eviction.Name]; exists {
return true, obj, nil
}
evictionFailed = true
return true, nil, fmt.Errorf("pod %q was unexpectedly evicted", eviction.Name)
}
return true, obj, nil
})
}
handle, podEvictor, err := frameworktesting.InitFrameworkHandle(ctx, fakeClient, nil, defaultevictor.DefaultEvictorArgs{NodeFit: true}, nil)
if err != nil {
t.Fatalf("Unable to initialize a framework handle: %v", err)
}
handle.MetricsCollectorImpl = collector
plugin, err := NewLowNodeUtilization(&LowNodeUtilizationArgs{
Thresholds: tc.thresholds,
TargetThresholds: tc.targetThresholds,
UseDeviationThresholds: tc.useDeviationThresholds,
EvictableNamespaces: tc.evictableNamespaces,
MetricsUtilization: MetricsUtilization{
MetricsServer: metricsEnabled,
},
},
handle)
if err != nil {
t.Fatalf("Unable to initialize the plugin: %v", err)
}
plugin.(frameworktypes.BalancePlugin).Balance(ctx, tc.nodes)
podsEvicted := podEvictor.TotalEvicted()
if expectedPodsEvicted != podsEvicted {
t.Errorf("Expected %v pods to be evicted but %v got evicted", expectedPodsEvicted, podsEvicted)
}
if evictionFailed {
t.Errorf("Pod evictions failed unexpectedly")
}
}
if evictionFailed {
t.Errorf("Pod evictions failed unexpectedly")
}
})
}
t.Run(tc.name, testFnc(false, tc.expectedPodsEvicted))
t.Run(tc.name+" with metrics enabled", testFnc(true, tc.expectedPodsWithMetricsEvicted))
}
}

View File

@@ -28,6 +28,7 @@ type LowNodeUtilizationArgs struct {
Thresholds api.ResourceThresholds `json:"thresholds"`
TargetThresholds api.ResourceThresholds `json:"targetThresholds"`
NumberOfNodes int `json:"numberOfNodes,omitempty"`
MetricsUtilization MetricsUtilization `json:"metricsUtilization,omitempty"`
// Naming this one differently since namespaces are still
// considered while considering resources used by pods
@@ -41,10 +42,19 @@ type LowNodeUtilizationArgs struct {
type HighNodeUtilizationArgs struct {
metav1.TypeMeta `json:",inline"`
Thresholds api.ResourceThresholds `json:"thresholds"`
NumberOfNodes int `json:"numberOfNodes,omitempty"`
Thresholds api.ResourceThresholds `json:"thresholds"`
NumberOfNodes int `json:"numberOfNodes,omitempty"`
MetricsUtilization MetricsUtilization `json:"metricsUtilization,omitempty"`
// Naming this one differently since namespaces are still
// considered while considering resources used by pods
// but then filtered out before eviction
EvictableNamespaces *api.Namespaces `json:"evictableNamespaces,omitempty"`
}
// MetricsUtilization allow to consume actual resource utilization from metrics
type MetricsUtilization struct {
// metricsServer enables metrics from a kubernetes metrics server.
// Please see https://kubernetes-sigs.github.io/metrics-server/ for more.
MetricsServer bool `json:"metricsServer,omitempty"`
}

View File

@@ -17,12 +17,16 @@ limitations under the License.
package nodeutilization
import (
"context"
"fmt"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/klog/v2"
utilptr "k8s.io/utils/ptr"
"sigs.k8s.io/descheduler/pkg/descheduler/metricscollector"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
@@ -100,3 +104,98 @@ func (s *requestedUsageClient) sync(nodes []*v1.Node) error {
return nil
}
type actualUsageClient struct {
resourceNames []v1.ResourceName
getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc
metricsCollector *metricscollector.MetricsCollector
_pods map[string][]*v1.Pod
_nodeUtilization map[string]map[v1.ResourceName]*resource.Quantity
}
var _ usageClient = &actualUsageClient{}
func newActualUsageClient(
resourceNames []v1.ResourceName,
getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc,
metricsCollector *metricscollector.MetricsCollector,
) *actualUsageClient {
return &actualUsageClient{
resourceNames: resourceNames,
getPodsAssignedToNode: getPodsAssignedToNode,
metricsCollector: metricsCollector,
}
}
func (client *actualUsageClient) nodeUtilization(node string) map[v1.ResourceName]*resource.Quantity {
return client._nodeUtilization[node]
}
func (client *actualUsageClient) pods(node string) []*v1.Pod {
return client._pods[node]
}
func (client *actualUsageClient) podUsage(pod *v1.Pod) (map[v1.ResourceName]*resource.Quantity, error) {
// It's not efficient to keep track of all pods in a cluster when only their fractions is evicted.
// Thus, take the current pod metrics without computing any softening (like e.g. EWMA).
podMetrics, err := client.metricsCollector.MetricsClient().MetricsV1beta1().PodMetricses(pod.Namespace).Get(context.TODO(), pod.Name, metav1.GetOptions{})
if err != nil {
return nil, fmt.Errorf("unable to get podmetrics for %q/%q: %v", pod.Namespace, pod.Name, err)
}
totalUsage := make(map[v1.ResourceName]*resource.Quantity)
for _, container := range podMetrics.Containers {
for _, resourceName := range client.resourceNames {
if resourceName == v1.ResourcePods {
continue
}
if _, exists := container.Usage[resourceName]; !exists {
return nil, fmt.Errorf("pod %v/%v: container %q is missing %q resource", pod.Namespace, pod.Name, container.Name, resourceName)
}
if totalUsage[resourceName] == nil {
totalUsage[resourceName] = utilptr.To[resource.Quantity](container.Usage[resourceName].DeepCopy())
} else {
totalUsage[resourceName].Add(container.Usage[resourceName])
}
}
}
return totalUsage, nil
}
func (client *actualUsageClient) sync(nodes []*v1.Node) error {
client._nodeUtilization = make(map[string]map[v1.ResourceName]*resource.Quantity)
client._pods = make(map[string][]*v1.Pod)
nodesUsage, err := client.metricsCollector.AllNodesUsage()
if err != nil {
return err
}
for _, node := range nodes {
pods, err := podutil.ListPodsOnANode(node.Name, client.getPodsAssignedToNode, nil)
if err != nil {
klog.V(2).InfoS("Node will not be processed, error accessing its pods", "node", klog.KObj(node), "err", err)
return fmt.Errorf("error accessing %q node's pods: %v", node.Name, err)
}
nodeUsage, ok := nodesUsage[node.Name]
if !ok {
return fmt.Errorf("unable to find node %q in the collected metrics", node.Name)
}
nodeUsage[v1.ResourcePods] = resource.NewQuantity(int64(len(pods)), resource.DecimalSI)
for _, resourceName := range client.resourceNames {
if _, exists := nodeUsage[resourceName]; !exists {
return fmt.Errorf("unable to find %q resource for collected %q node metric", resourceName, node.Name)
}
}
// store the snapshot of pods from the same (or the closest) node utilization computation
client._pods[node.Name] = pods
client._nodeUtilization[node.Name] = nodeUsage
}
return nil
}

View File

@@ -0,0 +1,139 @@
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nodeutilization
import (
"context"
"fmt"
"testing"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/client-go/informers"
fakeclientset "k8s.io/client-go/kubernetes/fake"
"k8s.io/metrics/pkg/apis/metrics/v1beta1"
fakemetricsclient "k8s.io/metrics/pkg/client/clientset/versioned/fake"
"sigs.k8s.io/descheduler/pkg/descheduler/metricscollector"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/test"
)
var (
nodesgvr = schema.GroupVersionResource{Group: "metrics.k8s.io", Version: "v1beta1", Resource: "nodes"}
podsgvr = schema.GroupVersionResource{Group: "metrics.k8s.io", Version: "v1beta1", Resource: "pods"}
)
func updateMetricsAndCheckNodeUtilization(
t *testing.T,
ctx context.Context,
newValue, expectedValue int64,
metricsClientset *fakemetricsclient.Clientset,
collector *metricscollector.MetricsCollector,
usageClient usageClient,
nodes []*v1.Node,
nodeName string,
nodemetrics *v1beta1.NodeMetrics,
) {
t.Logf("Set current node cpu usage to %v", newValue)
nodemetrics.Usage[v1.ResourceCPU] = *resource.NewMilliQuantity(newValue, resource.DecimalSI)
metricsClientset.Tracker().Update(nodesgvr, nodemetrics, "")
err := collector.Collect(ctx)
if err != nil {
t.Fatalf("failed to capture metrics: %v", err)
}
err = usageClient.sync(nodes)
if err != nil {
t.Fatalf("failed to capture a snapshot: %v", err)
}
nodeUtilization := usageClient.nodeUtilization(nodeName)
t.Logf("current node cpu usage: %v\n", nodeUtilization[v1.ResourceCPU].MilliValue())
if nodeUtilization[v1.ResourceCPU].MilliValue() != expectedValue {
t.Fatalf("cpu node usage expected to be %v, got %v instead", expectedValue, nodeUtilization[v1.ResourceCPU].MilliValue())
}
pods := usageClient.pods(nodeName)
fmt.Printf("pods: %#v\n", pods)
if len(pods) != 2 {
t.Fatalf("expected 2 pods for node %v, got %v instead", nodeName, len(pods))
}
}
func TestActualUsageClient(t *testing.T) {
n1 := test.BuildTestNode("n1", 2000, 3000, 10, nil)
n2 := test.BuildTestNode("n2", 2000, 3000, 10, nil)
n3 := test.BuildTestNode("n3", 2000, 3000, 10, nil)
p1 := test.BuildTestPod("p1", 400, 0, n1.Name, nil)
p21 := test.BuildTestPod("p21", 400, 0, n2.Name, nil)
p22 := test.BuildTestPod("p22", 400, 0, n2.Name, nil)
p3 := test.BuildTestPod("p3", 400, 0, n3.Name, nil)
nodes := []*v1.Node{n1, n2, n3}
n1metrics := test.BuildNodeMetrics("n1", 400, 1714978816)
n2metrics := test.BuildNodeMetrics("n2", 1400, 1714978816)
n3metrics := test.BuildNodeMetrics("n3", 300, 1714978816)
clientset := fakeclientset.NewSimpleClientset(n1, n2, n3, p1, p21, p22, p3)
metricsClientset := fakemetricsclient.NewSimpleClientset()
metricsClientset.Tracker().Create(nodesgvr, n1metrics, "")
metricsClientset.Tracker().Create(nodesgvr, n2metrics, "")
metricsClientset.Tracker().Create(nodesgvr, n3metrics, "")
ctx := context.TODO()
resourceNames := []v1.ResourceName{
v1.ResourceCPU,
v1.ResourceMemory,
}
sharedInformerFactory := informers.NewSharedInformerFactory(clientset, 0)
podInformer := sharedInformerFactory.Core().V1().Pods().Informer()
nodeLister := sharedInformerFactory.Core().V1().Nodes().Lister()
podsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {
t.Fatalf("Build get pods assigned to node function error: %v", err)
}
sharedInformerFactory.Start(ctx.Done())
sharedInformerFactory.WaitForCacheSync(ctx.Done())
collector := metricscollector.NewMetricsCollector(nodeLister, metricsClientset, labels.Everything())
usageClient := newActualUsageClient(
resourceNames,
podsAssignedToNode,
collector,
)
updateMetricsAndCheckNodeUtilization(t, ctx,
1400, 1400,
metricsClientset, collector, usageClient, nodes, n2.Name, n2metrics,
)
updateMetricsAndCheckNodeUtilization(t, ctx,
500, 1310,
metricsClientset, collector, usageClient, nodes, n2.Name, n2metrics,
)
updateMetricsAndCheckNodeUtilization(t, ctx,
900, 1269,
metricsClientset, collector, usageClient, nodes, n2.Name, n2metrics,
)
}

View File

@@ -37,6 +37,7 @@ func (in *HighNodeUtilizationArgs) DeepCopyInto(out *HighNodeUtilizationArgs) {
(*out)[key] = val
}
}
out.MetricsUtilization = in.MetricsUtilization
if in.EvictableNamespaces != nil {
in, out := &in.EvictableNamespaces, &out.EvictableNamespaces
*out = new(api.Namespaces)
@@ -81,6 +82,7 @@ func (in *LowNodeUtilizationArgs) DeepCopyInto(out *LowNodeUtilizationArgs) {
(*out)[key] = val
}
}
out.MetricsUtilization = in.MetricsUtilization
if in.EvictableNamespaces != nil {
in, out := &in.EvictableNamespaces, &out.EvictableNamespaces
*out = new(api.Namespaces)

View File

@@ -20,21 +20,22 @@ import (
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/trace"
"sigs.k8s.io/descheduler/metrics"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/framework/pluginregistry"
frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types"
"sigs.k8s.io/descheduler/pkg/tracing"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/util/errors"
"k8s.io/apimachinery/pkg/util/sets"
"k8s.io/client-go/informers"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/klog/v2"
"sigs.k8s.io/descheduler/metrics"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
"sigs.k8s.io/descheduler/pkg/descheduler/metricscollector"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/framework/pluginregistry"
frameworktypes "sigs.k8s.io/descheduler/pkg/framework/types"
"sigs.k8s.io/descheduler/pkg/tracing"
)
// evictorImpl implements the Evictor interface so plugins
@@ -67,6 +68,7 @@ func (ei *evictorImpl) Evict(ctx context.Context, pod *v1.Pod, opts evictions.Ev
// handleImpl implements the framework handle which gets passed to plugins
type handleImpl struct {
clientSet clientset.Interface
metricsCollector *metricscollector.MetricsCollector
getPodsAssignedToNodeFunc podutil.GetPodsAssignedToNodeFunc
sharedInformerFactory informers.SharedInformerFactory
evictor *evictorImpl
@@ -79,6 +81,10 @@ func (hi *handleImpl) ClientSet() clientset.Interface {
return hi.clientSet
}
func (hi *handleImpl) MetricsCollector() *metricscollector.MetricsCollector {
return hi.metricsCollector
}
// GetPodsAssignedToNodeFunc retrieves GetPodsAssignedToNodeFunc implementation
func (hi *handleImpl) GetPodsAssignedToNodeFunc() podutil.GetPodsAssignedToNodeFunc {
return hi.getPodsAssignedToNodeFunc
@@ -128,6 +134,7 @@ type handleImplOpts struct {
sharedInformerFactory informers.SharedInformerFactory
getPodsAssignedToNodeFunc podutil.GetPodsAssignedToNodeFunc
podEvictor *evictions.PodEvictor
metricsCollector *metricscollector.MetricsCollector
}
// WithClientSet sets clientSet for the scheduling frameworkImpl.
@@ -155,6 +162,12 @@ func WithGetPodsAssignedToNodeFnc(getPodsAssignedToNodeFunc podutil.GetPodsAssig
}
}
func WithMetricsCollector(metricsCollector *metricscollector.MetricsCollector) Option {
return func(o *handleImplOpts) {
o.metricsCollector = metricsCollector
}
}
func getPluginConfig(pluginName string, pluginConfigs []api.PluginConfig) (*api.PluginConfig, int) {
for idx, pluginConfig := range pluginConfigs {
if pluginConfig.Name == pluginName {
@@ -253,6 +266,7 @@ func NewProfile(config api.DeschedulerProfile, reg pluginregistry.Registry, opts
profileName: config.Name,
podEvictor: hOpts.podEvictor,
},
metricsCollector: hOpts.metricsCollector,
}
pluginNames := append(config.Plugins.Deschedule.Enabled, config.Plugins.Balance.Enabled...)

View File

@@ -24,6 +24,7 @@ import (
clientset "k8s.io/client-go/kubernetes"
"sigs.k8s.io/descheduler/pkg/descheduler/evictions"
"sigs.k8s.io/descheduler/pkg/descheduler/metricscollector"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
)
@@ -36,6 +37,7 @@ type Handle interface {
Evictor() Evictor
GetPodsAssignedToNodeFunc() podutil.GetPodsAssignedToNodeFunc
SharedInformerFactory() informers.SharedInformerFactory
MetricsCollector() *metricscollector.MetricsCollector
}
// Evictor defines an interface for filtering and evicting pods