1
0
mirror of https://github.com/kubernetes-sigs/descheduler.git synced 2026-01-26 05:14:13 +01:00

Set up the pod evictor only once

Currently, the pod evictor is created during each descheduling cycle
to reset the internal counters and the fake client (in case a dry run is
configured). Instead, create the pod evictor once and reset only what's
needed. So later on the pod evictor can be extended with e.g. a cache
keeping the track of eviction requests that are still in progress and
required more than a single descheduling cycle to complete.
This commit is contained in:
Jan Chaloupka
2024-06-23 18:15:44 +02:00
parent 0901cb18bf
commit 75880226c0
3 changed files with 143 additions and 35 deletions

View File

@@ -77,9 +77,9 @@ type descheduler struct {
priorityClassLister schedulingv1.PriorityClassLister
getPodsAssignedToNode podutil.GetPodsAssignedToNodeFunc
sharedInformerFactory informers.SharedInformerFactory
evictionPolicyGroupVersion string
deschedulerPolicy *api.DeschedulerPolicy
eventRecorder events.EventRecorder
podEvictor *evictions.PodEvictor
}
func newDescheduler(rs *options.DeschedulerServer, deschedulerPolicy *api.DeschedulerPolicy, evictionPolicyGroupVersion string, eventRecorder events.EventRecorder, sharedInformerFactory informers.SharedInformerFactory) (*descheduler, error) {
@@ -94,6 +94,16 @@ func newDescheduler(rs *options.DeschedulerServer, deschedulerPolicy *api.Desche
return nil, fmt.Errorf("build get pods assigned to node function error: %v", err)
}
podEvictor := evictions.NewPodEvictor(
nil,
evictionPolicyGroupVersion,
rs.DryRun,
deschedulerPolicy.MaxNoOfPodsToEvictPerNode,
deschedulerPolicy.MaxNoOfPodsToEvictPerNamespace,
!rs.DisableMetrics,
eventRecorder,
)
return &descheduler{
rs: rs,
podLister: podLister,
@@ -102,9 +112,9 @@ func newDescheduler(rs *options.DeschedulerServer, deschedulerPolicy *api.Desche
priorityClassLister: priorityClassLister,
getPodsAssignedToNode: getPodsAssignedToNode,
sharedInformerFactory: sharedInformerFactory,
evictionPolicyGroupVersion: evictionPolicyGroupVersion,
deschedulerPolicy: deschedulerPolicy,
eventRecorder: eventRecorder,
podEvictor: podEvictor,
}, nil
}
@@ -153,20 +163,13 @@ func (d *descheduler) runDeschedulerLoop(ctx context.Context, nodes []*v1.Node)
client = d.rs.Client
}
klog.V(3).Infof("Building a pod evictor")
podEvictor := evictions.NewPodEvictor(
client,
d.evictionPolicyGroupVersion,
d.rs.DryRun,
d.deschedulerPolicy.MaxNoOfPodsToEvictPerNode,
d.deschedulerPolicy.MaxNoOfPodsToEvictPerNamespace,
!d.rs.DisableMetrics,
d.eventRecorder,
)
klog.V(3).Infof("Setting up the pod evictor")
d.podEvictor.SetClient(client)
d.podEvictor.ResetCounters()
d.runProfiles(ctx, client, nodes, podEvictor)
d.runProfiles(ctx, client, nodes)
klog.V(1).InfoS("Number of evicted pods", "totalEvicted", podEvictor.TotalEvicted())
klog.V(1).InfoS("Number of evicted pods", "totalEvicted", d.podEvictor.TotalEvicted())
return nil
}
@@ -174,7 +177,7 @@ func (d *descheduler) runDeschedulerLoop(ctx context.Context, nodes []*v1.Node)
// runProfiles runs all the deschedule plugins of all profiles and
// later runs through all balance plugins of all profiles. (All Balance plugins should come after all Deschedule plugins)
// see https://github.com/kubernetes-sigs/descheduler/issues/979
func (d *descheduler) runProfiles(ctx context.Context, client clientset.Interface, nodes []*v1.Node, podEvictor *evictions.PodEvictor) {
func (d *descheduler) runProfiles(ctx context.Context, client clientset.Interface, nodes []*v1.Node) {
var span trace.Span
ctx, span = tracing.Tracer().Start(ctx, "runProfiles")
defer span.End()
@@ -185,7 +188,7 @@ func (d *descheduler) runProfiles(ctx context.Context, client clientset.Interfac
pluginregistry.PluginRegistry,
frameworkprofile.WithClientSet(client),
frameworkprofile.WithSharedInformerFactory(d.sharedInformerFactory),
frameworkprofile.WithPodEvictor(podEvictor),
frameworkprofile.WithPodEvictor(d.podEvictor),
frameworkprofile.WithGetPodsAssignedToNodeFnc(d.getPodsAssignedToNode),
)
if err != nil {

View File

@@ -13,15 +13,18 @@ import (
"k8s.io/apimachinery/pkg/runtime"
apiversion "k8s.io/apimachinery/pkg/version"
fakediscovery "k8s.io/client-go/discovery/fake"
"k8s.io/client-go/informers"
fakeclientset "k8s.io/client-go/kubernetes/fake"
core "k8s.io/client-go/testing"
"sigs.k8s.io/descheduler/cmd/descheduler/app/options"
"sigs.k8s.io/descheduler/pkg/api"
"sigs.k8s.io/descheduler/pkg/api/v1alpha1"
nodeutil "sigs.k8s.io/descheduler/pkg/descheduler/node"
"sigs.k8s.io/descheduler/pkg/framework/pluginregistry"
"sigs.k8s.io/descheduler/pkg/framework/plugins/defaultevictor"
"sigs.k8s.io/descheduler/pkg/framework/plugins/removeduplicates"
"sigs.k8s.io/descheduler/pkg/framework/plugins/removepodsviolatingnodetaints"
"sigs.k8s.io/descheduler/pkg/utils"
deschedulerversion "sigs.k8s.io/descheduler/pkg/version"
"sigs.k8s.io/descheduler/test"
)
@@ -174,7 +177,7 @@ func TestDuplicate(t *testing.T) {
}
if len(evictedPods) == 0 {
t.Fatalf("Unable to evict pod, node taint did not get propagated to descheduler strategies %v\n", err)
t.Fatalf("Unable to evict pods\n")
}
}
@@ -323,3 +326,96 @@ func podEvictionReactionFuc(evictedPods *[]string) func(action core.Action) (boo
return false, nil, nil // fallback to the default reactor
}
}
func initPluginRegistry() {
pluginregistry.PluginRegistry = pluginregistry.NewRegistry()
pluginregistry.Register(removeduplicates.PluginName, removeduplicates.New, &removeduplicates.RemoveDuplicates{}, &removeduplicates.RemoveDuplicatesArgs{}, removeduplicates.ValidateRemoveDuplicatesArgs, removeduplicates.SetDefaults_RemoveDuplicatesArgs, pluginregistry.PluginRegistry)
pluginregistry.Register(defaultevictor.PluginName, defaultevictor.New, &defaultevictor.DefaultEvictor{}, &defaultevictor.DefaultEvictorArgs{}, defaultevictor.ValidateDefaultEvictorArgs, defaultevictor.SetDefaults_DefaultEvictorArgs, pluginregistry.PluginRegistry)
}
func TestPodEvictorReset(t *testing.T) {
initPluginRegistry()
ctx := context.Background()
node1 := test.BuildTestNode("n1", 2000, 3000, 10, nil)
node2 := test.BuildTestNode("n2", 2000, 3000, 10, nil)
p1 := test.BuildTestPod("p1", 100, 0, node1.Name, nil)
p1.Namespace = "dev"
p2 := test.BuildTestPod("p2", 100, 0, node1.Name, nil)
p2.Namespace = "dev"
p3 := test.BuildTestPod("p3", 100, 0, node1.Name, nil)
p3.Namespace = "dev"
p4 := test.BuildTestPod("p4", 100, 0, node1.Name, nil)
p4.Namespace = "dev"
ownerRef1 := test.GetReplicaSetOwnerRefList()
p1.ObjectMeta.OwnerReferences = ownerRef1
p2.ObjectMeta.OwnerReferences = ownerRef1
p3.ObjectMeta.OwnerReferences = ownerRef1
p4.ObjectMeta.OwnerReferences = ownerRef1
client := fakeclientset.NewSimpleClientset(node1, node2, p1, p2, p3, p4)
eventClient := fakeclientset.NewSimpleClientset(node1, node2, p1, p2, p3, p4)
dp := &v1alpha1.DeschedulerPolicy{
Strategies: v1alpha1.StrategyList{
"RemoveDuplicates": v1alpha1.DeschedulerStrategy{
Enabled: true,
},
},
}
internalDeschedulerPolicy := &api.DeschedulerPolicy{}
scope := scope{}
err := v1alpha1.V1alpha1ToInternal(dp, pluginregistry.PluginRegistry, internalDeschedulerPolicy, scope)
if err != nil {
t.Fatalf("Unable to convert v1alpha1 to v1alpha2: %v", err)
}
rs, err := options.NewDeschedulerServer()
if err != nil {
t.Fatalf("Unable to initialize server: %v", err)
}
rs.Client = client
rs.EventClient = eventClient
var evictedPods []string
client.PrependReactor("create", "pods", podEvictionReactionFuc(&evictedPods))
sharedInformerFactory := informers.NewSharedInformerFactoryWithOptions(rs.Client, 0, informers.WithTransform(trimManagedFields))
eventBroadcaster, eventRecorder := utils.GetRecorderAndBroadcaster(ctx, client)
defer eventBroadcaster.Shutdown()
descheduler, err := newDescheduler(rs, internalDeschedulerPolicy, "v1", eventRecorder, sharedInformerFactory)
if err != nil {
t.Fatalf("Unable to create a descheduler instance: %v", err)
}
ctx, cancel := context.WithCancel(ctx)
defer cancel()
sharedInformerFactory.Start(ctx.Done())
sharedInformerFactory.WaitForCacheSync(ctx.Done())
nodes, err := nodeutil.ReadyNodes(ctx, rs.Client, descheduler.nodeLister, "")
if err != nil {
t.Fatalf("Unable to get ready nodes: %v", err)
}
// a single pod eviction expected
err = descheduler.runDeschedulerLoop(ctx, nodes)
if err != nil {
t.Fatalf("Unable to run a descheduling loop: %v", err)
}
if descheduler.podEvictor.TotalEvicted() != 2 || len(evictedPods) != 2 {
t.Fatalf("Expected (2,2) pods evicted, got (%v, %v) instead", descheduler.podEvictor.TotalEvicted(), len(evictedPods))
}
// a single pod eviction expected
err = descheduler.runDeschedulerLoop(ctx, nodes)
if err != nil {
t.Fatalf("Unable to run a descheduling loop: %v", err)
}
if descheduler.podEvictor.TotalEvicted() != 2 || len(evictedPods) != 4 {
t.Fatalf("Expected (2,4) pods evicted, got (%v, %v) instead", descheduler.podEvictor.TotalEvicted(), len(evictedPods))
}
}

View File

@@ -97,6 +97,15 @@ func (pe *PodEvictor) NodeLimitExceeded(node *v1.Node) bool {
return false
}
func (pe *PodEvictor) ResetCounters() {
pe.nodepodCount = make(nodePodEvictedCount)
pe.namespacePodCount = make(namespacePodEvictCount)
}
func (pe *PodEvictor) SetClient(client clientset.Interface) {
pe.client = client
}
// EvictOptions provides a handle for passing additional info to EvictPod
type EvictOptions struct {
// Reason allows for passing details about the specific eviction for logging.