@@ -333,29 +333,6 @@ func (s *SystemScheduler) computeJobAllocs() error {
333333 // find feasible nodes for all the task groups
334334 s .feasibleNodesForTG = s .findFeasibleNodesForTG (reconciliationResult .Update )
335335
336- // any further logic depends on whether we're canarying or not
337- isCanarying := map [string ]bool {}
338- if s .job != nil && s .deployment != nil {
339- for _ , tg := range s .job .TaskGroups {
340- dstate , ok := s .deployment .TaskGroups [tg .Name ]
341- if ! ok {
342- continue
343- }
344- // a system job is canarying if:
345- // - it has a non-empty update block (just a sanity check, all
346- // submitted jobs should have a non-empty update block as part of
347- // canonicalization)
348- // - canary parameter in the update block has to be positive
349- // - deployment has to be non-nil and it cannot have been promoted
350- // - this cannot be the initial job version
351- isCanarying [tg .Name ] = ! tg .Update .IsEmpty () &&
352- tg .Update .Canary > 0 &&
353- dstate != nil &&
354- ! dstate .Promoted &&
355- s .job .Version != 0
356- }
357- }
358-
359336 // Treat non in-place updates as an eviction and new placement, which will
360337 // be limited by max_parallel
361338 s .limitReached = evictAndPlace (s .ctx , s .job , reconciliationResult , sstructs .StatusAllocUpdating )
@@ -398,45 +375,45 @@ func (s *SystemScheduler) computeJobAllocs() error {
398375 continue
399376 }
400377
378+ // we can set the desired total now, it's always the amount of all
379+ // feasible nodes
401380 s .deployment .TaskGroups [tg .Name ].DesiredTotal = len (feasibleNodes )
402381
382+ dstate , ok := s .deployment .TaskGroups [tg .Name ]
383+ if ! ok {
384+ continue
385+ }
386+ // a system job is canarying if:
387+ // - it has a non-empty update block (just a sanity check, all
388+ // submitted jobs should have a non-empty update block as part of
389+ // canonicalization)
390+ // - canary parameter in the update block has to be positive
391+ // - deployment has to be non-nil and it cannot have been promoted
392+ // - this cannot be the initial job version
393+ isCanarying := ! tg .Update .IsEmpty () &&
394+ tg .Update .Canary > 0 &&
395+ dstate != nil &&
396+ ! dstate .Promoted &&
397+ s .job .Version != 0
398+
403399 // if this TG isn't canarying, we're done
404- if ! isCanarying [ tg . Name ] {
400+ if ! isCanarying {
405401 continue
406402 }
407403
404+ // we can now also set the desired canaries: it's the tg.Update.Canary
405+ // percent of all the feasible nodes, rounded up to the nearest int
406+ requiredCanaries := int (math .Ceil (float64 (tg .Update .Canary ) * float64 (len (feasibleNodes )) / 100 ))
407+ s .deployment .TaskGroups [tg .Name ].DesiredCanaries = requiredCanaries
408+
408409 // Initially, if the job requires canaries, we place all of them on
409410 // all eligible nodes. At this point we know which nodes are
410411 // feasible, so we evict unnedded canaries.
411- placedCanaries , err := s .evictUnneededCanaries (
412- len (s .feasibleNodesForTG [tg .Name ]),
413- tg .Update .Canary ,
414- )
412+ placedCanaries , err := s .evictUnneededCanaries (requiredCanaries )
415413 if err != nil {
416414 return fmt .Errorf ("failed to evict canaries for job '%s': %v" , s .eval .JobID , err )
417415 }
418-
419- s .deployment .TaskGroups [tg .Name ].DesiredCanaries = len (placedCanaries )
420416 s .deployment .TaskGroups [tg .Name ].PlacedCanaries = placedCanaries
421-
422- for nodeID , allocs := range s .plan .NodeUpdate {
423- filtered := []* structs.Allocation {}
424- for _ , a := range allocs {
425- if a .DesiredDescription == sstructs .StatusAllocNotNeeded {
426- filtered = append (filtered , a )
427- continue
428- }
429-
430- // we only keep allocs that are in the plan.NodeAllocation for
431- // this node
432- if _ , ok := s .plan .NodeAllocation [a .NodeID ]; ok {
433- filtered = append (filtered , a )
434- }
435- }
436-
437- s .plan .NodeUpdate [nodeID ] = filtered
438- }
439-
440417 }
441418
442419 // check if the deployment is complete
@@ -543,38 +520,26 @@ func (s *SystemScheduler) computePlacements(
543520 deploymentID = s .deployment .ID
544521 }
545522
546- nodes := make ([]* structs.Node , 1 )
547523 for _ , missing := range reconcilerResult .Place {
548524 tgName := missing .TaskGroup .Name
549525
550- var option * feasible.RankedNode
551-
552526 node , ok := nodeByID [missing .Alloc .NodeID ]
553527 if ! ok {
554528 s .logger .Debug ("could not find node" , "node" , missing .Alloc .NodeID )
555529 continue
556530 }
557531
558- // if we've already seen a feasible node for this tg, skip feasibility
559- // checks for it
532+ // we're already performed feasibility check for all the task groups and
533+ // nodes, so look up
534+ var option * feasible.RankedNode
560535 optionsForTG := s .feasibleNodesForTG [tgName ]
561- if existing := slices .IndexFunc (optionsForTG , func (rn * feasible.RankedNode ) bool { return rn .Node == node }); existing != - 1 {
536+ if existing := slices .IndexFunc (
537+ optionsForTG ,
538+ func (rn * feasible.RankedNode ) bool { return rn .Node == node },
539+ ); existing != - 1 {
562540 option = optionsForTG [existing ]
563541 }
564542
565- // if we couldn't find any in the feasible nodes map, perform a
566- // feasibility check for that node and TG
567- if option == nil {
568- // Update the set of placement nodes
569- nodes [0 ] = node
570- s .stack .SetNodes (nodes )
571-
572- // Attempt to match the task group
573- option = s .stack .Select (missing .TaskGroup , & feasible.SelectOptions {AllocName : missing .Name })
574- }
575-
576- // if we're still getting nothing, it means we don't have a feasible
577- // node
578543 if option == nil {
579544 // If the task can't be placed on this node, update reporting data
580545 // and continue to short circuit the loop
@@ -804,20 +769,17 @@ func evictAndPlace(ctx feasible.Context, job *structs.Job, reconciled *reconcile
804769
805770// evictAndPlaceCanaries checks how many canaries are needed against the amount
806771// of feasible nodes, and removes unnecessary placements from the plan.
807- func (s * SystemScheduler ) evictUnneededCanaries (feasibleNodes int , canary int ) ([]string , error ) {
772+ func (s * SystemScheduler ) evictUnneededCanaries (requiredCanaries int ) ([]string , error ) {
808773
809774 desiredCanaries := []string {} // FIXME: make this better
810775
811- // calculate how many canary placement we expect each task group to have: it
812- // should be the tg.update.canary percentage of eligible nodes, rounded up
813- // to the nearest integer
814- requiredCanaries := int (math .Ceil (float64 (canary ) * float64 (feasibleNodes ) / 100 ))
815-
816776 // no canaries to consider, quit early
817777 if requiredCanaries == 0 {
818778 return desiredCanaries , nil
819779 }
820780
781+ canaryCounter := requiredCanaries
782+
821783 // iterate over node allocations to find canary allocs
822784 for node , allocations := range s .plan .NodeAllocation {
823785 n := 0
@@ -826,8 +788,8 @@ func (s *SystemScheduler) evictUnneededCanaries(feasibleNodes int, canary int) (
826788 continue
827789 }
828790 if alloc .DeploymentStatus .Canary {
829- if requiredCanaries != 0 {
830- requiredCanaries -= 1
791+ if canaryCounter != 0 {
792+ canaryCounter -= 1
831793
832794 desiredCanaries = append (desiredCanaries , alloc .ID )
833795 allocations [n ] = alloc // we do this in order to avoid allocating another slice
@@ -844,11 +806,6 @@ func (s *SystemScheduler) evictUnneededCanaries(feasibleNodes int, canary int) (
844806func (s * SystemScheduler ) isDeploymentComplete (groupName string , buckets * reconciler.NodeReconcileResult , isCanarying bool ) bool {
845807 complete := len (buckets .Place )+ len (buckets .Migrate )+ len (buckets .Update ) == 0
846808
847- // fmt.Printf("complete? %v\n", complete)
848- // fmt.Printf("buckets.Place: %v buckets.Migrate: %v buckets.Update: %v\n", len(buckets.Place), len(buckets.Migrate), len(buckets.Update))
849- // fmt.Printf("s.deployment == nil? %v\n", s.deployment == nil)
850- // fmt.Printf("isCanarying? %v\n", isCanarying)
851-
852809 if ! complete || s .deployment == nil || isCanarying {
853810 return false
854811 }
0 commit comments