1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 05:14:13 +01:00

Update actualUsageClient

This commit is contained in:
Jan Chaloupka
2024-11-04 18:11:27 +01:00
parent 80f9c0ada6
commit ad18f41b66
8 changed files with 461 additions and 7 deletions

View File

@@ -0,0 +1,121 @@
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metricscollector
import (
"context"
"fmt"
"math"
"sync"
"time"
"k8s.io/klog/v2"
metricsclient "k8s.io/metrics/pkg/client/clientset/versioned"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/wait"
"k8s.io/client-go/kubernetes"
utilptr "k8s.io/utils/ptr"
)
const (
beta float64 = 0.9
)
type MetricsCollector struct {
clientset kubernetes.Interface
metricsClientset metricsclient.Interface
nodes map[string]map[v1.ResourceName]*resource.Quantity
mu sync.Mutex
}
func NewMetricsCollector(clientset kubernetes.Interface, metricsClientset metricsclient.Interface) *MetricsCollector {
return &MetricsCollector{
clientset: clientset,
metricsClientset: metricsClientset,
nodes: make(map[string]map[v1.ResourceName]*resource.Quantity),
}
}
func (mc *MetricsCollector) Run(ctx context.Context) {
wait.NonSlidingUntil(func() {
mc.Collect(ctx)
}, 5*time.Second, ctx.Done())
}
func weightedAverage(prevValue, value int64) int64 {
return int64(math.Floor(beta*float64(prevValue) + (1-beta)*float64(value)))
}
func (mc *MetricsCollector) NodeUsage(node *v1.Node) (map[v1.ResourceName]*resource.Quantity, error) {
mc.mu.Lock()
defer mc.mu.Unlock()
if _, exists := mc.nodes[node.Name]; !exists {
klog.V(4).Infof("unable to find node %q in the collected metrics", node.Name)
return nil, fmt.Errorf("unable to find node %q in the collected metrics", node.Name)
}
return map[v1.ResourceName]*resource.Quantity{
v1.ResourceCPU: utilptr.To[resource.Quantity](mc.nodes[node.Name][v1.ResourceCPU].DeepCopy()),
v1.ResourceMemory: utilptr.To[resource.Quantity](mc.nodes[node.Name][v1.ResourceMemory].DeepCopy()),
}, nil
}
func (mc *MetricsCollector) Collect(ctx context.Context) error {
mc.mu.Lock()
defer mc.mu.Unlock()
nodes, err := mc.clientset.CoreV1().Nodes().List(context.TODO(), metav1.ListOptions{})
if err != nil {
return fmt.Errorf("unable to list nodes: %v", err)
}
for _, node := range nodes.Items {
metrics, err := mc.metricsClientset.MetricsV1beta1().NodeMetricses().Get(context.TODO(), node.Name, metav1.GetOptions{})
if err != nil {
fmt.Printf("Error fetching metrics for node %s: %v\n", node.Name, err)
// No entry -> duplicate the previous value -> do nothing as beta*PV + (1-beta)*PV = PV
continue
}
if _, exists := mc.nodes[node.Name]; !exists {
mc.nodes[node.Name] = map[v1.ResourceName]*resource.Quantity{
v1.ResourceCPU: utilptr.To[resource.Quantity](metrics.Usage.Cpu().DeepCopy()),
v1.ResourceMemory: utilptr.To[resource.Quantity](metrics.Usage.Memory().DeepCopy()),
}
} else {
// get MilliValue to reduce loss of precision
mc.nodes[node.Name][v1.ResourceCPU].SetMilli(
weightedAverage(mc.nodes[node.Name][v1.ResourceCPU].MilliValue(), metrics.Usage.Cpu().MilliValue()),
)
mc.nodes[node.Name][v1.ResourceMemory].SetMilli(
weightedAverage(mc.nodes[node.Name][v1.ResourceMemory].MilliValue(), metrics.Usage.Memory().MilliValue()),
)
}
// Display CPU and memory usage
fmt.Printf("%s: %vm, %vMi\n", node.Name, metrics.Usage.Cpu().MilliValue(), metrics.Usage.Memory().Value()/(1024*1024))
fmt.Printf("%s: %vm, %vMi\n", node.Name, mc.nodes[node.Name][v1.ResourceCPU].MilliValue(), mc.nodes[node.Name][v1.ResourceMemory].Value()/(1024*1024))
}
fmt.Printf("--\n")
return nil
}

View File

@@ -0,0 +1,103 @@
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package metricscollector
import (
"context"
"os"
"testing"
"k8s.io/client-go/kubernetes"
"k8s.io/client-go/tools/clientcmd"
metricsclient "k8s.io/metrics/pkg/client/clientset/versioned"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/runtime/schema"
fakeclientset "k8s.io/client-go/kubernetes/fake"
fakemetricsclient "k8s.io/metrics/pkg/client/clientset/versioned/fake"
"sigs.k8s.io/descheduler/test"
)
func TestMetricsCollector1(t *testing.T) {
kubeconfig := os.Getenv("KUBECONFIG")
// Use the kubeconfig to build the Kubernetes client
config, err := clientcmd.BuildConfigFromFlags("", kubeconfig)
if err != nil {
panic(err.Error())
}
// Create the standard Kubernetes clientset
clientset, err := kubernetes.NewForConfig(config)
if err != nil {
panic(err.Error())
}
// Create the metrics clientset to access the metrics.k8s.io API
metricsClientset, err := metricsclient.NewForConfig(config)
if err != nil {
panic(err.Error())
}
collector := NewMetricsCollector(clientset, metricsClientset)
collector.Run(context.TODO())
// collector.Collect(context.TODO())
}
func checkCpuNodeUsage(t *testing.T, usage map[v1.ResourceName]*resource.Quantity, millicpu int64) {
t.Logf("current node cpu usage: %v\n", usage[v1.ResourceCPU].MilliValue())
if usage[v1.ResourceCPU].MilliValue() != millicpu {
t.Fatalf("cpu node usage expected to be %v, got %v instead", millicpu, usage[v1.ResourceCPU].MilliValue())
}
}
func TestMetricsCollector2(t *testing.T) {
gvr := schema.GroupVersionResource{Group: "metrics.k8s.io", Version: "v1beta1", Resource: "nodemetricses"}
n1 := test.BuildTestNode("n1", 2000, 3000, 10, nil)
n2 := test.BuildTestNode("n2", 2000, 3000, 10, nil)
n3 := test.BuildTestNode("n3", 2000, 3000, 10, nil)
n1metrics := test.BuildNodeMetrics("n1", 400, 1714978816)
n2metrics := test.BuildNodeMetrics("n2", 1400, 1714978816)
n3metrics := test.BuildNodeMetrics("n3", 300, 1714978816)
clientset := fakeclientset.NewSimpleClientset(n1, n2, n3)
metricsClientset := fakemetricsclient.NewSimpleClientset(n1metrics, n2metrics, n3metrics)
t.Logf("Set initial node cpu usage to 1400")
collector := NewMetricsCollector(clientset, metricsClientset)
collector.Collect(context.TODO())
nodesUsage, _ := collector.NodeUsage(n2)
checkCpuNodeUsage(t, nodesUsage, 1400)
t.Logf("Set current node cpu usage to 500")
n2metrics.Usage[v1.ResourceCPU] = *resource.NewMilliQuantity(500, resource.DecimalSI)
metricsClientset.Tracker().Update(gvr, n2metrics, "")
collector.Collect(context.TODO())
nodesUsage, _ = collector.NodeUsage(n2)
checkCpuNodeUsage(t, nodesUsage, 1310)
t.Logf("Set current node cpu usage to 500")
n2metrics.Usage[v1.ResourceCPU] = *resource.NewMilliQuantity(900, resource.DecimalSI)
metricsClientset.Tracker().Update(gvr, n2metrics, "")
collector.Collect(context.TODO())
nodesUsage, _ = collector.NodeUsage(n2)
checkCpuNodeUsage(t, nodesUsage, 1268)
}

View File

@@ -44,7 +44,7 @@ type HighNodeUtilization struct {
underutilizationCriteria []interface{}
resourceNames []v1.ResourceName
targetThresholds api.ResourceThresholds
usageSnapshot *usageSnapshot
usageSnapshot usageClient
}
var _ frameworktypes.BalancePlugin = &HighNodeUtilization{}

View File

@@ -43,7 +43,7 @@ type LowNodeUtilization struct {
underutilizationCriteria []interface{}
overutilizationCriteria []interface{}
resourceNames []v1.ResourceName
usageSnapshot *usageSnapshot
usageSnapshot usageClient
}
var _ frameworktypes.BalancePlugin = &LowNodeUtilization{}

View File

@@ -74,14 +74,12 @@ func normalizePercentage(percent api.Percentage) api.Percentage {
return percent
}
func getNodeThresholds(
nodes []*v1.Node,
lowThreshold, highThreshold api.ResourceThresholds,
resourceNames []v1.ResourceName,
useDeviationThresholds bool,
usageClient *requestedUsageClient,
usageClient usageClient,
) map[string]NodeThresholds {
nodeThresholdsMap := map[string]NodeThresholds{}
@@ -123,7 +121,7 @@ func getNodeThresholds(
func getNodeUsage(
nodes []*v1.Node,
usageClient *requestedUsageClient,
usageClient usageClient,
) []NodeUsage {
var nodeUsageList []NodeUsage
@@ -432,7 +430,7 @@ func classifyPods(pods []*v1.Pod, filter func(pod *v1.Pod) bool) ([]*v1.Pod, []*
return nonRemovablePods, removablePods
}
func averageNodeBasicresources(nodes []*v1.Node, usageClient *requestedUsageClient) api.ResourceThresholds {
func averageNodeBasicresources(nodes []*v1.Node, usageClient usageClient) api.ResourceThresholds {
total := api.ResourceThresholds{}
average := api.ResourceThresholds{}
numberOfNodes := len(nodes)

View File

@@ -20,11 +20,20 @@ import (
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/klog/v2"
metricsclient "k8s.io/metrics/pkg/client/clientset/versioned"
"sigs.k8s.io/descheduler/pkg/descheduler/metricscollector"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/pkg/utils"
)
type usageClient interface {
nodeUtilization(node string) map[v1.ResourceName]*resource.Quantity
nodes() []*v1.Node
pods(node string) []*v1.Pod
capture(nodes []*v1.Node) error
}
type requestedUsageClient struct {
resourceNames []v1.ResourceName
getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc
@@ -34,6 +43,8 @@ type requestedUsageClient struct {
_nodeUtilization map[string]map[v1.ResourceName]*resource.Quantity
}
var _ usageClient = &requestedUsageClient{}
func newRequestedUsageSnapshot(
resourceNames []v1.ResourceName,
getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc,
@@ -59,6 +70,7 @@ func (s *requestedUsageClient) pods(node string) []*v1.Pod {
func (s *requestedUsageClient) capture(nodes []*v1.Node) error {
s._nodeUtilization = make(map[string]map[v1.ResourceName]*resource.Quantity)
s._pods = make(map[string][]*v1.Pod)
capturedNodes := []*v1.Node{}
for _, node := range nodes {
pods, err := podutil.ListPodsOnANode(node.Name, s.getPodsAssignedToNode, nil)
@@ -78,7 +90,77 @@ func (s *requestedUsageClient) capture(nodes []*v1.Node) error {
// store the snapshot of pods from the same (or the closest) node utilization computation
s._pods[node.Name] = pods
s._nodeUtilization[node.Name] = nodeUsage
capturedNodes = append(capturedNodes, node)
}
s._nodes = capturedNodes
return nil
}
type actualUsageClient struct {
resourceNames []v1.ResourceName
getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc
metricsCollector *metricscollector.MetricsCollector
metricsClientset metricsclient.Interface
_nodes []*v1.Node
_pods map[string][]*v1.Pod
_nodeUtilization map[string]map[v1.ResourceName]*resource.Quantity
}
var _ usageClient = &actualUsageClient{}
func newActualUsageSnapshot(
resourceNames []v1.ResourceName,
getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc,
metricsCollector *metricscollector.MetricsCollector,
metricsClientset metricsclient.Interface,
) *actualUsageClient {
return &actualUsageClient{
resourceNames: resourceNames,
getPodsAssignedToNode: getPodsAssignedToNode,
metricsCollector: metricsCollector,
metricsClientset: metricsClientset,
}
}
func (client *actualUsageClient) nodeUtilization(node string) map[v1.ResourceName]*resource.Quantity {
return client._nodeUtilization[node]
}
func (client *actualUsageClient) nodes() []*v1.Node {
return client._nodes
}
func (client *actualUsageClient) pods(node string) []*v1.Pod {
return client._pods[node]
}
func (client *actualUsageClient) capture(nodes []*v1.Node) error {
client._nodeUtilization = make(map[string]map[v1.ResourceName]*resource.Quantity)
client._pods = make(map[string][]*v1.Pod)
capturedNodes := []*v1.Node{}
for _, node := range nodes {
pods, err := podutil.ListPodsOnANode(node.Name, client.getPodsAssignedToNode, nil)
if err != nil {
klog.V(2).InfoS("Node will not be processed, error accessing its pods", "node", klog.KObj(node), "err", err)
continue
}
nodeUsage, err := client.metricsCollector.NodeUsage(node)
if err != nil {
return err
}
// store the snapshot of pods from the same (or the closest) node utilization computation
client._pods[node.Name] = pods
client._nodeUtilization[node.Name] = nodeUsage
capturedNodes = append(capturedNodes, node)
}
client._nodes = capturedNodes
return nil
}

View File

@@ -0,0 +1,136 @@
/*
Copyright 2024 The Kubernetes Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package nodeutilization
import (
"context"
"fmt"
"testing"
v1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/resource"
"k8s.io/apimachinery/pkg/runtime/schema"
"k8s.io/client-go/informers"
fakeclientset "k8s.io/client-go/kubernetes/fake"
"k8s.io/metrics/pkg/apis/metrics/v1beta1"
fakemetricsclient "k8s.io/metrics/pkg/client/clientset/versioned/fake"
"sigs.k8s.io/descheduler/pkg/descheduler/metricscollector"
podutil "sigs.k8s.io/descheduler/pkg/descheduler/pod"
"sigs.k8s.io/descheduler/test"
)
var gvr = schema.GroupVersionResource{Group: "metrics.k8s.io", Version: "v1beta1", Resource: "nodemetricses"}
func updateMetricsAndCheckNodeUtilization(
t *testing.T,
ctx context.Context,
newValue, expectedValue int64,
metricsClientset *fakemetricsclient.Clientset,
collector *metricscollector.MetricsCollector,
usageSnapshot usageClient,
nodes []*v1.Node,
nodeName string,
nodemetrics *v1beta1.NodeMetrics,
) {
t.Logf("Set current node cpu usage to %v", newValue)
nodemetrics.Usage[v1.ResourceCPU] = *resource.NewMilliQuantity(newValue, resource.DecimalSI)
metricsClientset.Tracker().Update(gvr, nodemetrics, "")
err := collector.Collect(ctx)
if err != nil {
t.Fatalf("failed to capture metrics: %v", err)
}
err = usageSnapshot.capture(nodes)
if err != nil {
t.Fatalf("failed to capture a snapshot: %v", err)
}
nodeUtilization := usageSnapshot.nodeUtilization(nodeName)
t.Logf("current node cpu usage: %v\n", nodeUtilization[v1.ResourceCPU].MilliValue())
if nodeUtilization[v1.ResourceCPU].MilliValue() != expectedValue {
t.Fatalf("cpu node usage expected to be %v, got %v instead", expectedValue, nodeUtilization[v1.ResourceCPU].MilliValue())
}
pods := usageSnapshot.pods(nodeName)
fmt.Printf("pods: %#v\n", pods)
if len(pods) != 2 {
t.Fatalf("expected 2 pods for node %v, got %v instead", nodeName, len(pods))
}
capturedNodes := usageSnapshot.nodes()
if len(capturedNodes) != 3 {
t.Fatalf("expected 3 captured node, got %v instead", len(capturedNodes))
}
}
func TestActualUsageClient(t *testing.T) {
n1 := test.BuildTestNode("n1", 2000, 3000, 10, nil)
n2 := test.BuildTestNode("n2", 2000, 3000, 10, nil)
n3 := test.BuildTestNode("n3", 2000, 3000, 10, nil)
p1 := test.BuildTestPod("p1", 400, 0, n1.Name, nil)
p21 := test.BuildTestPod("p21", 400, 0, n2.Name, nil)
p22 := test.BuildTestPod("p22", 400, 0, n2.Name, nil)
p3 := test.BuildTestPod("p3", 400, 0, n3.Name, nil)
nodes := []*v1.Node{n1, n2, n3}
n1metrics := test.BuildNodeMetrics("n1", 400, 1714978816)
n2metrics := test.BuildNodeMetrics("n2", 1400, 1714978816)
n3metrics := test.BuildNodeMetrics("n3", 300, 1714978816)
clientset := fakeclientset.NewSimpleClientset(n1, n2, n3, p1, p21, p22, p3)
metricsClientset := fakemetricsclient.NewSimpleClientset(n1metrics, n2metrics, n3metrics)
ctx := context.TODO()
resourceNames := []v1.ResourceName{
v1.ResourceCPU,
v1.ResourceMemory,
}
sharedInformerFactory := informers.NewSharedInformerFactory(clientset, 0)
podInformer := sharedInformerFactory.Core().V1().Pods().Informer()
podsAssignedToNode, err := podutil.BuildGetPodsAssignedToNodeFunc(podInformer)
if err != nil {
t.Fatalf("Build get pods assigned to node function error: %v", err)
}
sharedInformerFactory.Start(ctx.Done())
sharedInformerFactory.WaitForCacheSync(ctx.Done())
collector := metricscollector.NewMetricsCollector(clientset, metricsClientset)
usageSnapshot := newActualUsageSnapshot(
resourceNames,
podsAssignedToNode,
collector,
metricsClientset,
)
updateMetricsAndCheckNodeUtilization(t, ctx,
1400, 1400,
metricsClientset, collector, usageSnapshot, nodes, n2.Name, n2metrics,
)
updateMetricsAndCheckNodeUtilization(t, ctx,
500, 1310,
metricsClientset, collector, usageSnapshot, nodes, n2.Name, n2metrics,
)
updateMetricsAndCheckNodeUtilization(t, ctx,
900, 1269,
metricsClientset, collector, usageSnapshot, nodes, n2.Name, n2metrics,
)
}

View File

@@ -31,6 +31,7 @@ import (
"k8s.io/apimachinery/pkg/util/uuid"
"k8s.io/apimachinery/pkg/util/wait"
clientset "k8s.io/client-go/kubernetes"
"k8s.io/metrics/pkg/apis/metrics/v1beta1"
utilptr "k8s.io/utils/ptr"
)
@@ -135,6 +136,19 @@ func BuildTestNode(name string, millicpu, mem, pods int64, apply func(*v1.Node))
return node
}
func BuildNodeMetrics(name string, millicpu, mem int64) *v1beta1.NodeMetrics {
return &v1beta1.NodeMetrics{
ObjectMeta: metav1.ObjectMeta{
Name: name,
},
Window: metav1.Duration{Duration: 20010000000},
Usage: v1.ResourceList{
v1.ResourceCPU: *resource.NewMilliQuantity(millicpu, resource.DecimalSI),
v1.ResourceMemory: *resource.NewQuantity(mem, resource.BinarySI),
},
}
}
// MakeBestEffortPod makes the given pod a BestEffort pod
func MakeBestEffortPod(pod *v1.Pod) {
pod.Spec.Containers[0].Resources.Requests = nil