Skip to content

Commit 7ed9f59

Browse files
Progress
1 parent f572681 commit 7ed9f59

File tree

5 files changed

+117
-80
lines changed

5 files changed

+117
-80
lines changed

helm/bundles/cortex-nova/templates/steps.yaml

Lines changed: 19 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -339,22 +339,22 @@ spec:
339339
# the datasources. This should be changed to use the hypervisor CRD.
340340
knowledges: []
341341
---
342-
# apiVersion: scheduling.cortex/v1alpha1
343-
# kind: Step
344-
# metadata:
345-
# name: avoid-high-steal-pct
346-
# spec:
347-
# operator: cortex-nova
348-
# # TODO: Remove this database reference once the scheduler
349-
# # step doesn't need it anymore.
350-
# databaseSecretRef:
351-
# name: cortex-nova-postgres
352-
# namespace: {{ .Release.Namespace }}
353-
# type: descheduler
354-
# impl: avoid_high_steal_pct
355-
# description: |
356-
# This step will deschedule VMs once they reach this CPU steal percentage over
357-
# the observed time span.
358-
# knowledges: []
359-
# opts:
360-
# maxStealPctOverObservedTimeSpan: 20.0
342+
apiVersion: scheduling.cortex/v1alpha1
343+
kind: Step
344+
metadata:
345+
name: avoid-high-steal-pct
346+
spec:
347+
operator: cortex-nova
348+
# TODO: Remove this database reference once the scheduler
349+
# step doesn't need it anymore.
350+
databaseSecretRef:
351+
name: cortex-nova-postgres
352+
namespace: {{ .Release.Namespace }}
353+
type: descheduler
354+
impl: avoid_high_steal_pct
355+
description: |
356+
This step will deschedule VMs once they reach this CPU steal percentage over
357+
the observed time span.
358+
knowledges: []
359+
opts:
360+
maxStealPctOverObservedTimeSpan: 20.0

scheduling/internal/decisions/nova/plugins/vmware/general_purpose_balancing.go

Lines changed: 14 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,16 @@ import (
1515

1616
// Options for the scheduling step, given through the step config in the service yaml file.
1717
type GeneralPurposeBalancingStepOpts struct {
18-
RAMUtilizedAfterLowerBoundPct float64 `json:"ramUtilizedAfterLowerBoundPct"` // -> mapped to ActivationLowerBound
19-
RAMUtilizedAfterUpperBoundPct float64 `json:"ramUtilizedAfterUpperBoundPct"` // -> mapped to ActivationUpperBound
20-
RAMUtilizedAfterActivationLowerBound float64 `json:"ramUtilizedAfterActivationLowerBound"`
21-
RAMUtilizedAfterActivationUpperBound float64 `json:"ramUtilizedAfterActivationUpperBound"`
18+
RAMUtilizedLowerBoundPct float64 `json:"ramUtilizedLowerBoundPct"` // -> mapped to ActivationLowerBound
19+
RAMUtilizedUpperBoundPct float64 `json:"ramUtilizedUpperBoundPct"` // -> mapped to ActivationUpperBound
20+
RAMUtilizedActivationLowerBound float64 `json:"ramUtilizedActivationLowerBound"`
21+
RAMUtilizedActivationUpperBound float64 `json:"ramUtilizedActivationUpperBound"`
2222
}
2323

2424
func (o GeneralPurposeBalancingStepOpts) Validate() error {
2525
// Avoid zero-division during min-max scaling.
26-
if o.RAMUtilizedAfterLowerBoundPct == o.RAMUtilizedAfterUpperBoundPct {
27-
return errors.New("ramUtilizedAfterLowerBound and ramUtilizedAfterUpperBound must not be equal")
26+
if o.RAMUtilizedLowerBoundPct == o.RAMUtilizedUpperBoundPct {
27+
return errors.New("ramUtilizedLowerBound and ramUtilizedUpperBound must not be equal")
2828
}
2929
return nil
3030
}
@@ -48,7 +48,7 @@ func (s *GeneralPurposeBalancingStep) Run(traceLog *slog.Logger, request api.Ext
4848
return result, nil
4949
}
5050

51-
result.Statistics["ram utilized after"] = s.PrepareStats(request, "%")
51+
result.Statistics["ram utilized"] = s.PrepareStats(request, "%")
5252

5353
var hostUtilizations []shared.HostUtilization
5454
group := "scheduler-nova"
@@ -62,18 +62,15 @@ func (s *GeneralPurposeBalancingStep) Run(traceLog *slog.Logger, request api.Ext
6262
if _, ok := result.Activations[hostUtilization.ComputeHost]; !ok {
6363
continue
6464
}
65-
after := hostUtilization.RAMUtilizedPct -
66-
(float64(request.Spec.Data.Flavor.Data.MemoryMB) /
67-
hostUtilization.TotalRAMAllocatableMB * 100)
6865
result.
69-
Statistics["ram utilized after"].
70-
Subjects[hostUtilization.ComputeHost] = after
66+
Statistics["ram utilized"].
67+
Subjects[hostUtilization.ComputeHost] = hostUtilization.RAMUtilizedPct
7168
result.Activations[hostUtilization.ComputeHost] = scheduling.MinMaxScale(
72-
after,
73-
s.Options.RAMUtilizedAfterLowerBoundPct,
74-
s.Options.RAMUtilizedAfterUpperBoundPct,
75-
s.Options.RAMUtilizedAfterActivationLowerBound,
76-
s.Options.RAMUtilizedAfterActivationUpperBound,
69+
hostUtilization.RAMUtilizedPct,
70+
s.Options.RAMUtilizedLowerBoundPct,
71+
s.Options.RAMUtilizedUpperBoundPct,
72+
s.Options.RAMUtilizedActivationLowerBound,
73+
s.Options.RAMUtilizedActivationUpperBound,
7774
)
7875
}
7976

scheduling/internal/decisions/nova/plugins/vmware/general_purpose_balancing_test.go

Lines changed: 82 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -22,16 +22,16 @@ func TestGeneralPurposeBalancingStepOpts_Validate(t *testing.T) {
2222
{
2323
name: "valid options",
2424
opts: GeneralPurposeBalancingStepOpts{
25-
RAMUtilizedAfterLowerBoundPct: 20.0,
26-
RAMUtilizedAfterUpperBoundPct: 80.0,
25+
RAMUtilizedLowerBoundPct: 20.0,
26+
RAMUtilizedUpperBoundPct: 80.0,
2727
},
2828
wantErr: false,
2929
},
3030
{
3131
name: "equal bounds - should error",
3232
opts: GeneralPurposeBalancingStepOpts{
33-
RAMUtilizedAfterLowerBoundPct: 50.0,
34-
RAMUtilizedAfterUpperBoundPct: 50.0,
33+
RAMUtilizedLowerBoundPct: 50.0,
34+
RAMUtilizedUpperBoundPct: 50.0,
3535
},
3636
wantErr: true,
3737
},
@@ -63,21 +63,19 @@ func TestGeneralPurposeBalancingStep_Run(t *testing.T) {
6363
t.Fatalf("expected no error, got %v", err)
6464
}
6565

66+
// Insert test data
6667
hostUtilizations := []any{
6768
&shared.HostUtilization{
68-
ComputeHost: "host1",
69-
RAMUtilizedPct: 60.0,
70-
TotalRAMAllocatableMB: 32768.0,
69+
ComputeHost: "host1",
70+
RAMUtilizedPct: 30.0,
7171
},
7272
&shared.HostUtilization{
73-
ComputeHost: "host2",
74-
RAMUtilizedPct: 40.0,
75-
TotalRAMAllocatableMB: 16384.0,
73+
ComputeHost: "host2",
74+
RAMUtilizedPct: 60.0,
7675
},
7776
&shared.HostUtilization{
78-
ComputeHost: "host3",
79-
RAMUtilizedPct: 80.0,
80-
TotalRAMAllocatableMB: 65536.0,
77+
ComputeHost: "host3",
78+
RAMUtilizedPct: 90.0,
8179
},
8280
}
8381
if err := testDB.Insert(hostUtilizations...); err != nil {
@@ -97,24 +95,29 @@ func TestGeneralPurposeBalancingStep_Run(t *testing.T) {
9795
ComputeHost: "host3",
9896
Traits: "HANA_EXCLUSIVE,OTHER_TRAIT",
9997
},
98+
&shared.HostCapabilities{
99+
ComputeHost: "host4",
100+
Traits: "GENERAL_PURPOSE",
101+
},
100102
}
101103
if err := testDB.Insert(hostCapabilities...); err != nil {
102104
t.Fatalf("expected no error, got %v", err)
103105
}
104106

105107
step := &GeneralPurposeBalancingStep{}
106108
step.Options = GeneralPurposeBalancingStepOpts{
107-
RAMUtilizedAfterLowerBoundPct: 20.0,
108-
RAMUtilizedAfterUpperBoundPct: 70.0,
109-
RAMUtilizedAfterActivationLowerBound: 0.0,
110-
RAMUtilizedAfterActivationUpperBound: 1.0,
109+
RAMUtilizedLowerBoundPct: 20.0,
110+
RAMUtilizedUpperBoundPct: 80.0,
111+
RAMUtilizedActivationLowerBound: 0.0,
112+
RAMUtilizedActivationUpperBound: 1.0,
111113
}
112114
step.DB = &testDB
113115

114116
tests := []struct {
115-
name string
116-
request api.ExternalSchedulerRequest
117-
expectedHosts map[string]bool // true if host should have modified activation
117+
name string
118+
request api.ExternalSchedulerRequest
119+
expectedActivations map[string]float64 // expected activation values
120+
expectStatistics bool // whether statistics should be present
118121
}{
119122
{
120123
name: "General purpose VM on VMware",
@@ -136,11 +139,12 @@ func TestGeneralPurposeBalancingStep_Run(t *testing.T) {
136139
{ComputeHost: "host3"},
137140
},
138141
},
139-
expectedHosts: map[string]bool{
140-
"host1": true, // should get activation (60% - 12.5% = 47.5%, in range 20-70%)
141-
"host2": false, // should be no-effect (40% - 25% = 15%, below 20% range)
142-
"host3": false, // HANA_EXCLUSIVE host should be no-effect
142+
expectedActivations: map[string]float64{
143+
"host1": 0.16666666666666666, // (30-20)/(80-20) = 10/60 = 0.167
144+
"host2": 0.6666666666666666, // (60-20)/(80-20) = 40/60 = 0.667
145+
"host3": 0.0, // HANA_EXCLUSIVE host should be no-effect
143146
},
147+
expectStatistics: true,
144148
},
145149
{
146150
name: "HANA flavor should be skipped",
@@ -161,10 +165,11 @@ func TestGeneralPurposeBalancingStep_Run(t *testing.T) {
161165
{ComputeHost: "host2"},
162166
},
163167
},
164-
expectedHosts: map[string]bool{
165-
"host1": false, // should be no-effect
166-
"host2": false, // should be no-effect
168+
expectedActivations: map[string]float64{
169+
"host1": 0.0, // should be no-effect
170+
"host2": 0.0, // should be no-effect
167171
},
172+
expectStatistics: false,
168173
},
169174
{
170175
name: "Non-VMware VM should be skipped",
@@ -185,10 +190,36 @@ func TestGeneralPurposeBalancingStep_Run(t *testing.T) {
185190
{ComputeHost: "host2"},
186191
},
187192
},
188-
expectedHosts: map[string]bool{
189-
"host1": false, // should be no-effect
190-
"host2": false, // should be no-effect
193+
expectedActivations: map[string]float64{
194+
"host1": 0.0, // should be no-effect
195+
"host2": 0.0, // should be no-effect
191196
},
197+
expectStatistics: false,
198+
},
199+
{
200+
name: "Host without capabilities gets no-effect",
201+
request: api.ExternalSchedulerRequest{
202+
Spec: api.NovaObject[api.NovaSpec]{
203+
Data: api.NovaSpec{
204+
Flavor: api.NovaObject[api.NovaFlavor]{
205+
Data: api.NovaFlavor{
206+
Name: "m1.small",
207+
MemoryMB: 2048,
208+
},
209+
},
210+
},
211+
},
212+
VMware: true,
213+
Hosts: []api.ExternalSchedulerHost{
214+
{ComputeHost: "host1"},
215+
{ComputeHost: "host_unknown"}, // no capabilities for this host
216+
},
217+
},
218+
expectedActivations: map[string]float64{
219+
"host1": 0.16666666666666666, // normal scaling
220+
"host_unknown": 0.0, // no-effect due to missing capabilities
221+
},
222+
expectStatistics: true,
192223
},
193224
}
194225

@@ -203,32 +234,39 @@ func TestGeneralPurposeBalancingStep_Run(t *testing.T) {
203234
t.Fatal("expected result, got nil")
204235
}
205236

206-
for host, shouldHaveActivation := range tt.expectedHosts {
237+
// Check activations
238+
for host, expectedActivation := range tt.expectedActivations {
207239
activation, ok := result.Activations[host]
208240
if !ok {
209241
t.Errorf("expected activation for host %s", host)
210242
continue
211243
}
212244

213-
if shouldHaveActivation {
214-
// For general purpose balancing, we expect some calculated activation based on RAM utilization
215-
if activation == 0 {
216-
t.Errorf("expected non-zero activation for host %s, got %f", host, activation)
217-
}
218-
} else {
219-
// Should be no-effect (0)
220-
if activation != 0 {
221-
t.Errorf("expected no-effect (0) activation for host %s, got %f", host, activation)
222-
}
245+
// Use a small epsilon for floating point comparison
246+
epsilon := 1e-10
247+
if abs(activation-expectedActivation) > epsilon {
248+
t.Errorf("expected activation %.10f for host %s, got %.10f", expectedActivation, host, activation)
223249
}
224250
}
225251

226252
// Check statistics
227-
if tt.name == "General purpose VM on VMware" {
228-
if _, ok := result.Statistics["ram utilized after"]; !ok {
229-
t.Error("expected 'ram utilized after' statistic")
253+
if tt.expectStatistics {
254+
if _, ok := result.Statistics["ram utilized"]; !ok {
255+
t.Error("expected 'ram utilized' statistic")
256+
}
257+
} else {
258+
if _, ok := result.Statistics["ram utilized"]; ok {
259+
t.Error("did not expect 'ram utilized' statistic")
230260
}
231261
}
232262
})
233263
}
234264
}
265+
266+
// Helper function for floating point comparison
267+
func abs(x float64) float64 {
268+
if x < 0 {
269+
return -x
270+
}
271+
return x
272+
}

scheduling/internal/descheduling/nova/pipeline_controller.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ type DeschedulingsPipelineController struct {
4343
// The base controller will delegate the pipeline creation down to this method.
4444
func (c *DeschedulingsPipelineController) InitPipeline(ctx context.Context, steps []v1alpha1.Step) (*Pipeline, error) {
4545
pipeline := &Pipeline{
46+
Client: c.Client,
4647
CycleDetector: c.CycleDetector,
4748
Monitor: c.Monitor,
4849
}

scheduling/internal/lib/pipeline_controller.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ func (c *BasePipelineController[PipelineType]) handlePipelineChange(
121121
}
122122
// Delegate to the parent controller which knows how to create the pipeline.
123123
if existingPipeline, exists := c.Pipelines[obj.Name]; exists {
124+
log.Info("deinitializing existing pipeline before reinit", "pipelineName", obj.Name)
124125
if err := existingPipeline.Deinit(ctx); err != nil {
125126
log.Error(err, "failed to deinitialize existing pipeline", "pipelineName", obj.Name)
126127
}

0 commit comments

Comments
 (0)