@@ -32,13 +32,14 @@ func NewNodeReconciler(deployment *structs.Deployment) *NodeReconciler {
3232 }
3333}
3434
35- // Compute is like diffSystemAllocsForNode however, the allocations in the
36- // diffResult contain the specific nodeID they should be allocated on.
35+ // Compute is like computeCanaryNodes however, the allocations in the
36+ // NodeReconcileResult contain the specific nodeID they should be allocated on.
3737func (nr * NodeReconciler ) Compute (
3838 job * structs.Job , // jobs whose allocations are going to be diff-ed
3939 readyNodes []* structs.Node , // list of nodes in the ready state
4040 notReadyNodes map [string ]struct {}, // list of nodes in DC but not ready, e.g. draining
4141 taintedNodes map [string ]* structs.Node , // nodes which are down or drain mode (by node id)
42+ infeasibleNodes map [string ][]string , // maps task groups to node IDs that are not feasible for them
4243 live []* structs.Allocation , // non-terminal allocations
4344 terminal structs.TerminalByNodeByName , // latest terminal allocations (by node id)
4445 serverSupportsDisconnectedClients bool , // flag indicating whether to apply disconnected client logic
@@ -64,7 +65,7 @@ func (nr *NodeReconciler) Compute(
6465 // Canary deployments deploy to the TaskGroup.UpdateStrategy.Canary
6566 // percentage of eligible nodes, so we create a mapping of task group name
6667 // to a list of nodes that canaries should be placed on.
67- canaryNodes , canariesPerTG := nr .computeCanaryNodes (required , nodeAllocs , terminal , eligibleNodes )
68+ canaryNodes , canariesPerTG := nr .computeCanaryNodes (required , nodeAllocs , terminal , eligibleNodes , infeasibleNodes )
6869
6970 compatHadExistingDeployment := nr .DeploymentCurrent != nil
7071
@@ -102,7 +103,7 @@ func (nr *NodeReconciler) Compute(
102103// many total canaries are to be placed for a TG.
103104func (nr * NodeReconciler ) computeCanaryNodes (required map [string ]* structs.TaskGroup ,
104105 liveAllocs map [string ][]* structs.Allocation , terminalAllocs structs.TerminalByNodeByName ,
105- eligibleNodes map [string ]* structs.Node ) (map [string ]map [string ]bool , map [string ]int ) {
106+ eligibleNodes map [string ]* structs.Node , infeasibleNodes map [ string ][] string ) (map [string ]map [string ]bool , map [string ]int ) {
106107
107108 canaryNodes := map [string ]map [string ]bool {}
108109 eligibleNodesList := slices .Collect (maps .Values (eligibleNodes ))
@@ -114,7 +115,17 @@ func (nr *NodeReconciler) computeCanaryNodes(required map[string]*structs.TaskGr
114115 }
115116
116117 // round up to the nearest integer
117- numberOfCanaryNodes := int (math .Ceil (float64 (tg .Update .Canary ) * float64 (len (eligibleNodes )) / 100 ))
118+ numberOfCanaryNodes := int (math .Ceil (float64 (tg .Update .Canary )* float64 (len (eligibleNodes ))/ 100 )) - len (infeasibleNodes [tg .Name ])
119+
120+ // check if there's a current deployment present. It could be that the
121+ // desired amount of canaries has to be reduced due to infeasible nodes.
122+ // if nr.DeploymentCurrent != nil {
123+ // if dstate, ok := nr.DeploymentCurrent.TaskGroups[tg.Name]; ok {
124+ // numberOfCanaryNodes = dstate.DesiredCanaries
125+ // fmt.Printf("existing deploy, setting number of canary nodes to %v\n", dstate.DesiredCanaries)
126+ // }
127+ // }
128+
118129 canariesPerTG [tg .Name ] = numberOfCanaryNodes
119130
120131 // check if there are any live allocations on any nodes that are/were
@@ -135,6 +146,10 @@ func (nr *NodeReconciler) computeCanaryNodes(required map[string]*structs.TaskGr
135146 }
136147
137148 for i , n := range eligibleNodesList {
149+ // infeasible nodes can never become canary candidates
150+ if slices .Contains (infeasibleNodes [tg .Name ], n .ID ) {
151+ continue
152+ }
138153 if i > numberOfCanaryNodes - 1 {
139154 break
140155 }
@@ -441,10 +456,10 @@ func (nr *NodeReconciler) computeForNode(
441456 dstate .ProgressDeadline = tg .Update .ProgressDeadline
442457 }
443458 dstate .DesiredTotal = len (eligibleNodes )
444- }
445459
446- if isCanarying [tg .Name ] && ! dstate .Promoted {
447- dstate .DesiredCanaries = canariesPerTG [tg .Name ]
460+ if isCanarying [tg .Name ] && ! dstate .Promoted {
461+ dstate .DesiredCanaries = canariesPerTG [tg .Name ]
462+ }
448463 }
449464
450465 // Check for an existing allocation
@@ -587,14 +602,21 @@ func (nr *NodeReconciler) createDeployment(job *structs.Job, tg *structs.TaskGro
587602}
588603
589604func (nr * NodeReconciler ) isDeploymentComplete (groupName string , buckets * NodeReconcileResult , isCanarying bool ) bool {
605+ fmt .Printf ("\n ===========\n " )
606+ fmt .Println ("isDeploymentComplete call" )
590607 complete := len (buckets .Place )+ len (buckets .Migrate )+ len (buckets .Update ) == 0
591608
609+ fmt .Printf ("\n is complete? %v buckets.Place: %v buckets.Update: %v\n " , complete , len (buckets .Place ), len (buckets .Update ))
610+ fmt .Printf ("\n nr.deploymentCurrent == nil? %v isCanarying?: %v\n " , nr .DeploymentCurrent == nil , isCanarying )
611+ fmt .Println ("===========" )
612+
592613 if ! complete || nr .DeploymentCurrent == nil || isCanarying {
593614 return false
594615 }
595616
596617 // ensure everything is healthy
597618 if dstate , ok := nr .DeploymentCurrent .TaskGroups [groupName ]; ok {
619+ fmt .Printf ("\n healthy allocs %v desiredtotal: %v desired canaries: %v\n " , dstate .HealthyAllocs , dstate .DesiredTotal , dstate .DesiredCanaries )
598620 if dstate .HealthyAllocs < dstate .DesiredTotal { // Make sure we have enough healthy allocs
599621 complete = false
600622 }
0 commit comments