@@ -83,52 +83,57 @@ def __readNodeList():
8383 return nodes
8484
8585
86- def __writeNodeList (node_list ):
86+ def __writeNodeList (node_list , slots = 0 ):
8787 _config = "/opt/slurm/etc/slurm.conf"
8888 fh , abs_path = mkstemp ()
8989 with open (abs_path ,'w' ) as new_file :
9090 with open (_config ) as slurm_config :
9191 for line in slurm_config :
9292 if line .startswith ('#PARTITION' ):
93- partition = line .split (':' )[1 ].rstrip ()
93+ # Involved slurm.conf section
94+ # #PARTITION:compute
95+ # NodeName=dummy-compute Procs=2048 State=UNKNOWN
96+ # NodeName=ip-172-31-6-43,ip-172-31-7-230 Procs=1 State=UNKNOWN
97+ # PartitionName=compute Nodes=dummy-compute,ip-172-31-6-43,ip-172-31-7-230 Default=YES MaxTime=INFINITE State=UP
98+ partition_name = line .split (':' )[1 ].rstrip ()
9499 new_file .write (line )
95- dummy_node = slurm_config .next ()
96- new_file .write (dummy_node )
97- node_names = slurm_config .next ()
98- partitions = slurm_config .next ()
99- items = node_names .split (' ' )
100- node_line = items [0 ].split ('=' )
101- if len (node_list [partition ]) > 0 :
102- new_file .write ('NodeName=' + ',' .join (node_list [partition ]) + " " + ' ' .join (items [1 :]))
100+ dummy_node_line = slurm_config .next ()
101+ new_file .write (dummy_node_line )
102+ node_names_line = slurm_config .next ()
103+ partitions_line = slurm_config .next ()
104+ node_names_line_items = node_names_line .split (' ' )
105+ if slots == 0 :
106+ slots = node_names_line_items [1 ].split ('=' )[1 ].strip ()
107+ if len (node_list [partition_name ]) > 0 :
108+ new_file .write ('NodeName=' + ',' .join (node_list [partition_name ]) + ' Procs=%s' % slots + ' ' + ' ' .join (node_names_line_items [2 :]))
103109 else :
104- new_file .write ("#NodeName= Procs=1 State=UNKNOWN\n " )
105- items = partitions .split (' ' )
106- node_line = items [1 ].split ('=' )
107- new_file .write (items [0 ] + " " + node_line [0 ] + '=dummy-' + partition + ',' + ',' .join (node_list [partition ]) + " " + ' ' .join (items [2 :]))
110+ new_file .write ('#NodeName= Procs=%s State=UNKNOWN\n ' % slots )
111+ partitions_line_items = partitions_line .split (' ' )
112+ new_file .write (partitions_line_items [0 ] + ' Nodes=dummy-' + partition_name + ',' + ',' .join (node_list [partition_name ]) + " " + ' ' .join (partitions_line_items [2 :]))
108113 else :
109114 new_file .write (line )
110115 os .close (fh )
111- #Remove original file
116+ # Remove original file
112117 os .remove (_config )
113- #Move new file
118+ # Move new file
114119 move (abs_path , _config )
115- #Update permissions on new file
120+ # Update permissions on new file
116121 os .chmod (_config , 0744 )
117122
118123
119124def addHost (hostname , cluster_user , slots ):
120- log .info ('Adding %s' % hostname )
125+ log .info ('Adding %s with %s slots ' % ( hostname , slots ) )
121126
122127 # Get the current node list
123128 node_list = __readNodeList ()
124129
125130 # Add new node
126131 node_list ['compute' ].append (hostname )
127- __writeNodeList (node_list )
132+ __writeNodeList (node_list , slots )
128133
129134 # Restart slurmctl locally
130135 restartMasterNodeSlurm ()
131-
136+
132137 # Restart slurmctl on host
133138 __restartSlurm (hostname , cluster_user )
134139
0 commit comments