Skip to content

Commit 919b2cd

Browse files
committed
GPUEff: change calculation to properly average over all job steps
1 parent 2be07a3 commit 919b2cd

File tree

3 files changed

+14
-14
lines changed

3 files changed

+14
-14
lines changed

README.rst

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -285,16 +285,17 @@ them. For other columns, check ``man sacct``.
285285

286286
* ``ReqGPU``: Number of GPUs requested, from ``ReqTRES[gres/gpu]``.
287287

288-
* ``GpuMem``: From ``TRESUsageInAve[gres/gpumem]``
288+
* ``GpuMem``: From ``TRESUsageInTot[gres/gpumem]``
289289

290-
* ``GpuUtil``: From ``TRESUsageInAve[gres/gpuutil]`` (normalized to
290+
* ``GpuUtil``: From ``TRESUsageInTot[gres/gpuutil]`` (normalized to
291291
fraction 0.0-ngpus).
292292

293293
* ``NGpus``: Number of GPUs from ``AllocTRES[gres/gpu]``.
294294
Should be the same as ``ReqGPU``, but who knows.
295295

296-
* ``GpuUtilTot``, ``GpuMemTot``: like above but using the
297-
``TRESUsageInTot`` sacct field.
296+
* ``GpuUtilTot``, ``GpuMemTot``: Same as the columns above (before
297+
the above columns were made with ``TRESUsageInAve``, these columns
298+
saved for the time being for backwards compatibility).
298299

299300
* ``GpuEff``: From ``TRESUsageInTot[gres/gpuutil]``) / (100 *
300301
``AllocTRES[gres/gpu]``).

slurm2sql.py

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -668,8 +668,8 @@ def calc(row):
668668
#'_NGPU': slurmGPUCount, # Number of GPUs, extracted from comment field
669669
'_NGpus': ExtractField('NGpus', 'AllocTRES', 'gres/gpu', float_metric),
670670
'_GpuType': slurmGPUType, # gres/gpu:TYPE= from AllocTres
671-
'_GpuUtil': ExtractField('GpuUtil', 'TRESUsageInAve', 'gres/gpuutil', float_metric, wrap=lambda x: x/100.), # can be >100 for multi-GPU.
672-
'_GpuMem': ExtractField('GpuMem2', 'TRESUsageInAve', 'gres/gpumem', float_metric),
671+
'_GpuUtil': ExtractField('GpuUtil', 'TRESUsageInTot', 'gres/gpuutil', float_metric, wrap=lambda x: x/100.), # can be >100 for multi-GPU.
672+
'_GpuMem': ExtractField('GpuMem2', 'TRESUsageInTot', 'gres/gpumem', float_metric),
673673
'_GpuUtilTot': ExtractField('GpuUtilTot', 'TRESUsageInTot', 'gres/gpuutil', float_metric),
674674
'_GpuMemTot': ExtractField('GpuMemTot', 'TRESUsageInTot', 'gres/gpumem', float_metric),
675675
}
@@ -940,7 +940,7 @@ def infer_type(cd):
940940
'ReqTRES, '
941941
'max(Elapsed) AS Elapsed, '
942942
'max(NCPUS) AS NCPUS, '
943-
'sum(totalcpu)/max(cputime) AS CPUeff, ' # highest TotalCPU is for the whole allocation
943+
'sum(totalcpu)/max(cputime) AS CPUeff, '
944944
'max(cputime) AS cpu_s_reserved, '
945945
'sum(totalcpu) AS cpu_s_used, '
946946
'max(ReqMemNode) AS MemReq, '
@@ -952,10 +952,9 @@ def infer_type(cd):
952952
'max(NGpus) AS NGpus, '
953953
'max(GPUType) AS GPUType, '
954954
'max(NGpus)*max(Elapsed) AS gpu_s_reserved, '
955-
'max(NGpus)*max(Elapsed)*max(GPUutil) AS gpu_s_used, '
956-
#'max(GPUutil)/max(NGpus) AS GPUeff, ' # Individual job with highest use (check this)
957-
'max(GPUEff) AS GPUeff, ' # Individual job with highest use (check this)
958-
'max(GPUMem) AS GPUMem, '
955+
'max(NGpus)*max(Elapsed)*max(GpuUtil) AS gpu_s_used, '
956+
'sum(GpuUtil*Elapsed)/max(Ngpus*Elapsed) AS GpuEff, '
957+
'max(GpuMem) AS GpuMem, '
959958
'MaxDiskRead, '
960959
'MaxDiskWrite, '
961960
'sum(TotDiskRead) as TotDiskRead, '

test.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,7 @@ def data3(slurm_version_2011):
7777

7878
def csvdata(data):
7979
"""Convert string CSV to a reader for s2s"""
80-
reader = csv.DictReader(StringIO(data.strip()))
80+
reader = csv.DictReader(StringIO(data.strip()), skipinitialspace=True)
8181
return reader
8282

8383
def fetch(db, jobid, field, table='slurm'):
@@ -192,8 +192,8 @@ def test_memeff(db):
192192

193193
def test_gpueff(db):
194194
data = """
195-
JobID,AllocTRES,TRESUsageInTot
196-
1,gres/gpu=1,gres/gpuutil=23
195+
JobID,Elapsed,AllocTRES, TRESUsageInTot
196+
1, 1:00, gres/gpu=1, gres/gpuutil=23
197197
"""
198198
slurm2sql.slurm2sql(db, [], csv_input=csvdata(data))
199199
print(db.execute('select * from eff;').fetchall())

0 commit comments

Comments
 (0)