Skip to content

Commit 8cc33c0

Browse files
committed
scheduler: system deployments need to always take into account infeasible nodes
Canary calculation is off for clusters with infeasible nodes.
1 parent 48863bd commit 8cc33c0

File tree

2 files changed

+57
-20
lines changed

2 files changed

+57
-20
lines changed

scheduler/reconciler/reconcile_node.go

Lines changed: 30 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,14 @@ func NewNodeReconciler(deployment *structs.Deployment) *NodeReconciler {
3232
}
3333
}
3434

35-
// Compute is like diffSystemAllocsForNode however, the allocations in the
36-
// diffResult contain the specific nodeID they should be allocated on.
35+
// Compute is like computeCanaryNodes however, the allocations in the
36+
// NodeReconcileResult contain the specific nodeID they should be allocated on.
3737
func (nr *NodeReconciler) Compute(
3838
job *structs.Job, // jobs whose allocations are going to be diff-ed
3939
readyNodes []*structs.Node, // list of nodes in the ready state
4040
notReadyNodes map[string]struct{}, // list of nodes in DC but not ready, e.g. draining
4141
taintedNodes map[string]*structs.Node, // nodes which are down or drain mode (by node id)
42+
infeasibleNodes map[string][]string, // maps task groups to node IDs that are not feasible for them
4243
live []*structs.Allocation, // non-terminal allocations
4344
terminal structs.TerminalByNodeByName, // latest terminal allocations (by node id)
4445
serverSupportsDisconnectedClients bool, // flag indicating whether to apply disconnected client logic
@@ -64,7 +65,7 @@ func (nr *NodeReconciler) Compute(
6465
// Canary deployments deploy to the TaskGroup.UpdateStrategy.Canary
6566
// percentage of eligible nodes, so we create a mapping of task group name
6667
// to a list of nodes that canaries should be placed on.
67-
canaryNodes, canariesPerTG := nr.computeCanaryNodes(required, nodeAllocs, terminal, eligibleNodes)
68+
canaryNodes, canariesPerTG := nr.computeCanaryNodes(required, nodeAllocs, terminal, eligibleNodes, infeasibleNodes)
6869

6970
compatHadExistingDeployment := nr.DeploymentCurrent != nil
7071

@@ -102,7 +103,7 @@ func (nr *NodeReconciler) Compute(
102103
// many total canaries are to be placed for a TG.
103104
func (nr *NodeReconciler) computeCanaryNodes(required map[string]*structs.TaskGroup,
104105
liveAllocs map[string][]*structs.Allocation, terminalAllocs structs.TerminalByNodeByName,
105-
eligibleNodes map[string]*structs.Node) (map[string]map[string]bool, map[string]int) {
106+
eligibleNodes map[string]*structs.Node, infeasibleNodes map[string][]string) (map[string]map[string]bool, map[string]int) {
106107

107108
canaryNodes := map[string]map[string]bool{}
108109
eligibleNodesList := slices.Collect(maps.Values(eligibleNodes))
@@ -114,7 +115,17 @@ func (nr *NodeReconciler) computeCanaryNodes(required map[string]*structs.TaskGr
114115
}
115116

116117
// round up to the nearest integer
117-
numberOfCanaryNodes := int(math.Ceil(float64(tg.Update.Canary) * float64(len(eligibleNodes)) / 100))
118+
numberOfCanaryNodes := int(math.Ceil(float64(tg.Update.Canary)*float64(len(eligibleNodes))/100)) - len(infeasibleNodes[tg.Name])
119+
120+
// check if there's a current deployment present. It could be that the
121+
// desired amount of canaries has to be reduced due to infeasible nodes.
122+
// if nr.DeploymentCurrent != nil {
123+
// if dstate, ok := nr.DeploymentCurrent.TaskGroups[tg.Name]; ok {
124+
// numberOfCanaryNodes = dstate.DesiredCanaries
125+
// fmt.Printf("existing deploy, setting number of canary nodes to %v\n", dstate.DesiredCanaries)
126+
// }
127+
// }
128+
118129
canariesPerTG[tg.Name] = numberOfCanaryNodes
119130

120131
// check if there are any live allocations on any nodes that are/were
@@ -135,6 +146,10 @@ func (nr *NodeReconciler) computeCanaryNodes(required map[string]*structs.TaskGr
135146
}
136147

137148
for i, n := range eligibleNodesList {
149+
// infeasible nodes can never become canary candidates
150+
if slices.Contains(infeasibleNodes[tg.Name], n.ID) {
151+
continue
152+
}
138153
if i > numberOfCanaryNodes-1 {
139154
break
140155
}
@@ -441,10 +456,10 @@ func (nr *NodeReconciler) computeForNode(
441456
dstate.ProgressDeadline = tg.Update.ProgressDeadline
442457
}
443458
dstate.DesiredTotal = len(eligibleNodes)
444-
}
445459

446-
if isCanarying[tg.Name] && !dstate.Promoted {
447-
dstate.DesiredCanaries = canariesPerTG[tg.Name]
460+
if isCanarying[tg.Name] && !dstate.Promoted {
461+
dstate.DesiredCanaries = canariesPerTG[tg.Name]
462+
}
448463
}
449464

450465
// Check for an existing allocation
@@ -587,14 +602,21 @@ func (nr *NodeReconciler) createDeployment(job *structs.Job, tg *structs.TaskGro
587602
}
588603

589604
func (nr *NodeReconciler) isDeploymentComplete(groupName string, buckets *NodeReconcileResult, isCanarying bool) bool {
605+
fmt.Printf("\n===========\n")
606+
fmt.Println("isDeploymentComplete call")
590607
complete := len(buckets.Place)+len(buckets.Migrate)+len(buckets.Update) == 0
591608

609+
fmt.Printf("\nis complete? %v buckets.Place: %v buckets.Update: %v\n", complete, len(buckets.Place), len(buckets.Update))
610+
fmt.Printf("\nnr.deploymentCurrent == nil? %v isCanarying?: %v\n", nr.DeploymentCurrent == nil, isCanarying)
611+
fmt.Println("===========")
612+
592613
if !complete || nr.DeploymentCurrent == nil || isCanarying {
593614
return false
594615
}
595616

596617
// ensure everything is healthy
597618
if dstate, ok := nr.DeploymentCurrent.TaskGroups[groupName]; ok {
619+
fmt.Printf("\nhealthy allocs %v desiredtotal: %v desired canaries: %v\n", dstate.HealthyAllocs, dstate.DesiredTotal, dstate.DesiredCanaries)
598620
if dstate.HealthyAllocs < dstate.DesiredTotal { // Make sure we have enough healthy allocs
599621
complete = false
600622
}

scheduler/scheduler_system.go

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,10 @@ type SystemScheduler struct {
4545
ctx *feasible.EvalContext
4646
stack *feasible.SystemStack
4747

48-
nodes []*structs.Node
49-
notReadyNodes map[string]struct{}
50-
nodesByDC map[string]int
48+
nodes []*structs.Node
49+
notReadyNodes map[string]struct{}
50+
nodesByDC map[string]int
51+
infeasibleNodes map[string][]string // maps task group names to node IDs that aren't feasible for these TGs
5152

5253
deployment *structs.Deployment
5354

@@ -278,7 +279,7 @@ func (s *SystemScheduler) computeJobAllocs() error {
278279

279280
// Diff the required and existing allocations
280281
nr := reconciler.NewNodeReconciler(s.deployment)
281-
r := nr.Compute(s.job, s.nodes, s.notReadyNodes, tainted, live, term,
282+
r := nr.Compute(s.job, s.nodes, s.notReadyNodes, tainted, s.infeasibleNodes, live, term,
282283
s.planner.ServersMeetMinimumVersion(minVersionMaxClientDisconnect, true))
283284
if s.logger.IsDebug() {
284285
s.logger.Debug("reconciled current state with desired state", r.Fields()...)
@@ -447,9 +448,22 @@ func (s *SystemScheduler) computePlacements(place []reconciler.AllocTuple, exist
447448
s.planAnnotations.DesiredTGUpdates[tgName].Place -= 1
448449
}
449450

450-
if s.plan.Deployment != nil {
451-
s.deployment.TaskGroups[tgName].DesiredTotal -= 1
451+
// Store this node's ID as infeasible, so that when we need to make
452+
// another deployment, we know to avoid it.
453+
if s.infeasibleNodes == nil {
454+
s.infeasibleNodes = make(map[string][]string)
452455
}
456+
if s.infeasibleNodes[tgName] == nil {
457+
s.infeasibleNodes[tgName] = make([]string, 1)
458+
}
459+
s.infeasibleNodes[tgName] = append(s.infeasibleNodes[tgName], node.ID)
460+
461+
// if s.plan.Deployment != nil {
462+
// s.deployment.TaskGroups[tgName].DesiredTotal -= 1
463+
// if s.deployment.TaskGroups[tgName].DesiredCanaries != 0 {
464+
// s.deployment.TaskGroups[tgName].DesiredCanaries -= 1
465+
// }
466+
// }
453467

454468
// Filtered nodes are not reported to users, just omitted from the job status
455469
continue
@@ -613,13 +627,14 @@ func (s *SystemScheduler) canHandle(trigger string) bool {
613627
}
614628

615629
// evictAndPlace is used to mark allocations for evicts and add them to the
616-
// placement queue. evictAndPlace modifies the diffResult. It returns true if
617-
// the limit has been reached for any task group.
618-
func evictAndPlace(ctx feasible.Context, job *structs.Job, diff *reconciler.NodeReconcileResult, desc string) bool {
630+
// placement queue. evictAndPlace modifies the NodeReconcilerResult. It returns
631+
// true if the limit has been reached for any task group.
632+
func evictAndPlace(ctx feasible.Context, job *structs.Job,
633+
result *reconciler.NodeReconcileResult, desc string) bool {
619634

620635
limits := map[string]int{} // per task group limits
621636
if !job.Stopped() {
622-
jobLimit := len(diff.Update)
637+
jobLimit := len(result.Update)
623638
if job.Update.MaxParallel > 0 {
624639
jobLimit = job.Update.MaxParallel
625640
}
@@ -633,10 +648,10 @@ func evictAndPlace(ctx feasible.Context, job *structs.Job, diff *reconciler.Node
633648
}
634649

635650
limited := false
636-
for _, a := range diff.Update {
651+
for _, a := range result.Update {
637652
if limit := limits[a.Alloc.TaskGroup]; limit > 0 {
638653
ctx.Plan().AppendStoppedAlloc(a.Alloc, desc, "", "")
639-
diff.Place = append(diff.Place, a)
654+
result.Place = append(result.Place, a)
640655
if !a.Canary {
641656
limits[a.Alloc.TaskGroup]--
642657
}

0 commit comments

Comments
 (0)