@@ -7,10 +7,10 @@ import (
77 "fmt"
88 "math"
99 "runtime/debug"
10+ "slices"
1011
1112 log "github.com/hashicorp/go-hclog"
1213 "github.com/hashicorp/go-memdb"
13- "github.com/hashicorp/go-set/v3"
1414 "github.com/hashicorp/nomad/helper/uuid"
1515 "github.com/hashicorp/nomad/nomad/structs"
1616 "github.com/hashicorp/nomad/scheduler/feasible"
@@ -52,7 +52,7 @@ type SystemScheduler struct {
5252 nodesByDC map [string ]int
5353
5454 deployment * structs.Deployment
55- feasibleNodesForTG map [string ]* set. Set [ string ] // used to track which nodes passed the feasibility check for TG
55+ feasibleNodesForTG map [string ][] * feasible. RankedNode // used to track which nodes passed the feasibility check for TG
5656
5757 limitReached bool
5858 nextEval * structs.Evaluation
@@ -279,13 +279,9 @@ func (s *SystemScheduler) computeJobAllocs() error {
279279 // Split out terminal allocations
280280 live , term := structs .SplitTerminalAllocs (allocs )
281281
282- // Find which of the eligible nodes are actually feasible for which TG. This way
283- // we get correct DesiredTotal and DesiredCanaries counts in the reconciler.
284- s .feasibleNodesForTG = s .findIgnorableNodes (live )
285-
286282 // Diff the required and existing allocations
287283 nr := reconciler .NewNodeReconciler (s .deployment )
288- reconciliationResult := nr .Compute (s .job , s .nodes , s .notReadyNodes , tainted , s . feasibleNodesForTG ,
284+ reconciliationResult := nr .Compute (s .job , s .nodes , s .notReadyNodes , tainted ,
289285 live , term , s .planner .ServersMeetMinimumVersion (minVersionMaxClientDisconnect , true ))
290286 if s .logger .IsDebug () {
291287 s .logger .Debug ("reconciled current state with desired state" , reconciliationResult .Fields ()... )
@@ -334,6 +330,17 @@ func (s *SystemScheduler) computeJobAllocs() error {
334330 DesiredTGUpdates : desiredUpdates (reconciliationResult , inplaceUpdates , destructiveUpdates ),
335331 }
336332
333+ // find feasible nodes for all the task groups
334+ s .feasibleNodesForTG = s .findFeasibleNodesForTG (reconciliationResult .Update , s .ctx , sstructs .StatusAllocUpdating )
335+
336+ fmt .Printf ("found the following feasible nodes:\n " )
337+ for k , v := range s .feasibleNodesForTG {
338+ for _ , node := range v {
339+ fmt .Printf ("tg %s has nodes: %v\n " , k , node .Node .ID )
340+ }
341+ fmt .Printf ("in total, tg %s has %d feasible nodes\n " , k , len (v ))
342+ }
343+
337344 // any further logic depends on whether we're canarying or not
338345 isCanarying := map [string ]bool {}
339346 if s .job != nil && s .deployment != nil {
@@ -357,9 +364,6 @@ func (s *SystemScheduler) computeJobAllocs() error {
357364 }
358365 }
359366
360- // find feasible nodes for each TG before we do any maxParallel evictions
361- s .findFeasibleNodesForTG (reconciliationResult .Update )
362-
363367 // Treat non in-place updates as an eviction and new placement, which will
364368 // be limited by max_parallel
365369 s .limitReached = evictAndPlace (s .ctx , s .job , reconciliationResult , sstructs .StatusAllocUpdating )
@@ -384,10 +388,6 @@ func (s *SystemScheduler) computeJobAllocs() error {
384388 return err
385389 }
386390
387- for k , v := range s .feasibleNodesForTG {
388- fmt .Printf ("found %d feasible nodes for tg %v: %v\n " , v .Size (), k , v .String ())
389- }
390-
391391 // if there is not deployment we're done at this point
392392 if s .deployment == nil {
393393 return nil
@@ -406,7 +406,7 @@ func (s *SystemScheduler) computeJobAllocs() error {
406406 continue
407407 }
408408
409- s .deployment .TaskGroups [tg .Name ].DesiredTotal = feasibleNodes . Size ( )
409+ s .deployment .TaskGroups [tg .Name ].DesiredTotal = len ( feasibleNodes )
410410
411411 // if this TG isn't canarying, we're done
412412 if ! isCanarying [tg .Name ] {
@@ -417,7 +417,7 @@ func (s *SystemScheduler) computeJobAllocs() error {
417417 // all eligible nodes. At this point we know which nodes are
418418 // feasible, so we evict unnedded canaries.
419419 placedCanaries , err := s .evictUnneededCanaries (
420- s .feasibleNodesForTG [tg .Name ]. Size ( ),
420+ len ( s .feasibleNodesForTG [tg .Name ]),
421421 tg .Update .Canary ,
422422 )
423423 if err != nil {
@@ -488,59 +488,52 @@ func mergeNodeFiltered(acc, curr *structs.AllocMetric) *structs.AllocMetric {
488488 return acc
489489}
490490
491- // findIgnorableNodes checks if there are allocations deployed to nodes that are
492- // from the same job version as ours, and can thus be omitted from feasibility
493- // checks
494- func (s * SystemScheduler ) findIgnorableNodes (allocs []* structs.Allocation ) map [string ]* set.Set [string ] {
495- if s .job == nil {
496- return nil
491+ func (s * SystemScheduler ) findFeasibleNodesForTG (updates []reconciler.AllocTuple , ctx feasible.Context , desc string ) map [string ][]* feasible.RankedNode {
492+ nodeByID := make (map [string ]* structs.Node , len (s .nodes ))
493+ for _ , node := range s .nodes {
494+ nodeByID [node .ID ] = node
497495 }
498496
499- feasibleNodes := make (map [string ]* set.Set [string ])
500-
501- for _ , a := range allocs {
502- if a .Job == nil {
503- continue
504- }
497+ feasibleNodes := make (map [string ][]* feasible.RankedNode )
505498
506- // if there's an existing alloc for this version of the job, there
507- // must've been an eval that checked its feasibility already
508- if a .Job .Version == s .job .Version {
509- // count this node as feasible
510- if feasibleNodes [a .TaskGroup ] == nil {
511- feasibleNodes [a .TaskGroup ] = set.New [string ](0 )
512- }
499+ nodes := make ([]* structs.Node , 1 )
500+ for _ , a := range updates {
513501
514- feasibleNodes [a .TaskGroup ].Insert (a .NodeID )
515- }
516- }
502+ // stop everything here
503+ ctx .Plan ().AppendStoppedAlloc (a .Alloc , desc , "" , "" )
517504
518- return feasibleNodes
519- }
520-
521- func (s * SystemScheduler ) findFeasibleNodesForTG (updates []reconciler.AllocTuple ) {
522- for _ , a := range updates {
523505 tgName := a .TaskGroup .Name
524- fmt .Printf ("looking for feasible node for tg %s\n " , tgName )
525506
526- s .stack .SetNodes (s .nodes )
507+ node , ok := nodeByID [a .Alloc .NodeID ]
508+ if ! ok {
509+ s .logger .Debug ("could not find node" , "node" , a .Alloc .NodeID )
510+ continue
511+ }
512+
513+ // Update the set of placement nodes
514+ nodes [0 ] = node
515+ s .stack .SetNodes (nodes )
527516
528517 // Attempt to match the task group
529518 option := s .stack .Select (a .TaskGroup , & feasible.SelectOptions {AllocName : a .Name })
530-
531519 if option == nil {
532- fmt .Printf ("no feasible node found for %v!\n " , a .Alloc )
533520 continue
534521 }
535522
536- fmt .Printf ("found feasible node %v for tg %v\n " , option .Node .ID , tgName )
537523 // count this node as feasible
538- if s .feasibleNodesForTG [tgName ] == nil {
539- s .feasibleNodesForTG [tgName ] = set.New [string ](0 )
524+ if feasibleNodes [tgName ] == nil {
525+ feasibleNodes [tgName ] = []* feasible.RankedNode {option }
526+ } else {
527+ if ! slices .ContainsFunc (
528+ feasibleNodes [tgName ],
529+ func (rn * feasible.RankedNode ) bool { return rn .Node .ID == option .Node .ID },
530+ ) {
531+ feasibleNodes [tgName ] = append (feasibleNodes [tgName ], option )
532+ }
540533 }
541-
542- s .feasibleNodesForTG [tgName ].Insert (option .Node .ID )
543534 }
535+
536+ return feasibleNodes
544537}
545538
546539// computePlacements computes placements for allocations
@@ -569,18 +562,26 @@ func (s *SystemScheduler) computePlacements(
569562 for _ , missing := range reconcilerResult .Place {
570563 tgName := missing .TaskGroup .Name
571564
565+ var option * feasible.RankedNode
566+
572567 node , ok := nodeByID [missing .Alloc .NodeID ]
573568 if ! ok {
574569 s .logger .Debug ("could not find node" , "node" , missing .Alloc .NodeID )
575570 continue
576571 }
577572
573+ // if we've already seen a feasible node for this tg, skip feasibility
574+ // checks for it
575+ // option = s.feasibleNodesForTG[tgName]
576+
577+ // if option == nil {
578578 // Update the set of placement nodes
579579 nodes [0 ] = node
580580 s .stack .SetNodes (nodes )
581581
582582 // Attempt to match the task group
583- option := s .stack .Select (missing .TaskGroup , & feasible.SelectOptions {AllocName : missing .Name })
583+ option = s .stack .Select (missing .TaskGroup , & feasible.SelectOptions {AllocName : missing .Name })
584+ // }
584585
585586 if option == nil {
586587 // If the task can't be placed on this node, update reporting data
@@ -727,13 +728,6 @@ func (s *SystemScheduler) computePlacements(
727728 }
728729
729730 s .plan .AppendAlloc (alloc , nil )
730-
731- // count this node as feasible
732- if s .feasibleNodesForTG [tgName ] == nil {
733- s .feasibleNodesForTG [tgName ] = set.New [string ](0 )
734- }
735-
736- s .feasibleNodesForTG [tgName ].Insert (alloc .NodeID )
737731 }
738732
739733 return nil
0 commit comments