Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
package_dir={'': 'src'},
packages=find_packages(where='src'),
package_data={
'supremm': ['assets/modw_supremm.sql', 'assets/mongo_setup.js', '*.pxd', '*.pyx'],
'supremm': ['assets/modw_supremm.sql', 'assets/*schema.json', '*.pxd', '*.pyx'],
'supremm.datasource.pcp.pcpcinterface': ['*.pxd', '*.pyx']
},
data_files=[
Expand Down
183 changes: 45 additions & 138 deletions src/supremm/assets/mongo_setup.js → src/supremm/assets/schema.json
Original file line number Diff line number Diff line change
@@ -1,105 +1,22 @@
var sdef = {
"_id": "timeseries-4",
"type": "timeseries",
"applies_to_version": 4,
"metrics": {
"cpuuser": {
"units": "CPU %",
"description": "CPU User",
"help": "The average percentage of time spent in CPU user mode. The average is computed over each time interval."
},
"membw": {
"units": "GB/s",
"description": "Memory bandwidth",
"help": "The total rate of data transferred to and from main memory. The rate is computed over each time interval. This value is obtained from the hardware counters."
},
"simdins": {
"units": "insts/s",
"description": "SIMD instructions",
"help": "The total rate of floating point SIMD instructions reported by the hardware performance counters on the CPU cores on which the job ran. Note that the meaning of this value is hardware-specific so the data should not in general be compared between HPC resources that have different hardware architectures."
},
"sveins": {
"units": "insts/s",
"description": "SVE instructions",
"help": "The total rate of SVE instructions reported by the hardware performance counters on the CPU cores on which the job ran. The number of floating point operations per SVE instruction retired can vary."
},
"gpu_usage": {
"units": "GPU %",
"description": "GPU utilization %",
"help": "The average percentage of time spent with the GPU active. The average is computed over each time interval."
},
"clktks": {
"units": "insts/s",
"description": "Clock Ticks",
"help": "The total rate of clock ticks reported by the hardware performance counters on the CPU cores on which the job ran. Note that the meaning of this value is hardware-specific so the data should not in general be compared between HPC resources that have different hardware architectures."
},
"memused_minus_diskcache": {
"units": "GB",
"description": "Node Memory RSS",
"help": "The total physical memory used by the operating system excluding memory used for caches. This value includes the contribution for <em>all</em> processes including system daemons and all running HPC jobs but does not include the physical memory used by the kernel page and SLAB caches. For HPC resources that use a Linux-based operating system this value is obtained from the <code>meminfo</code> file in sysfs for each numa node (i.e. <code>/sys/devices/system/node/nodeX/meminfo</code>)"
},
"corepower": {
"units": "Watts",
"help": "The power consumption obtained from hardware measurements built into the CPU or motherboard. This will not include the contribution from other hardware components on the compue node such as periperal cards and power supplies.",
"description": "CPU Power"
},
"power": {
"units": "Watts",
"help": "The power consumption of the compute nodes obtained from the IPMI DCMI interface. This does not include any associated power usage from other datacenter components such as network switches, parallel filesystems and cooling.",
"description": "Node Power"
},
"memused": {
"units": "GB",
"description": "Total Node Memory",
"help": "The total physical memory used by the operating system. For HPC resources that use a Linux-based operating system this value is obtained from the <code>meminfo</code> file in sysfs for each numa node (i.e. <code>/sys/devices/system/node/nodeX/meminfo</code>)"
},
"process_mem_usage": {
"units": "GB",
"description": "Total CGroup Memory",
"help": "The total amount of memory used in the memory cgroup that contained the job. The value is obtained from the kernel cgroup metrics."
},
"ib_lnet": {
"units": "MB/s",
"description": "Interconnect MPI traffic",
"help": "The total rate of data transferred over the parallel interconnect. The rate is computed over each time interval and is the sum of the data sent and received by each node. Some HPC resources also use the interconnect for parallel filesystem traffic; this filesystem traffic is not included in these data."
},
"lnet": {
"units": "MB/s",
"description": "Parallel Filesystem traffic",
"help": "The total rate of data transferred to and from the parallel filesystem. The rate is computed over each time interval and is the sum of data sent and received by each node."
},
"block": {
"units": "GB/s",
"description": "Block Filesystem traffic",
"help": "The total rate of data transferred to and from the block devices on each node. The rate is computed over each time interval and is the sum of data read and written."
},
"nfs": {
"units": "MB/s",
"description": "NFS Filesystem traffic",
"help": "The total rate of data transferred to and from the parallel filesystem over NFS mounts. The rate is computed over each time interval and is the sum of data sent and received by each node."
}
}
};

var summarydef = {
"summary_version": "summary-1.0.6",
{
"summary_version": "summary-1.0.6",
"_id": "summary-1.0.6",
"definitions": {
"lnet": {
"documentation": "",
"type": "",
"documentation": "",
"type": "",
"unit": ""
},
},
"catastrophe": {
"documentation": "",
"type": "",
"documentation": "",
"type": "",
"unit": ""
},
},
"infiniband": {
"documentation": "",
"type": "",
"documentation": "",
"type": "",
"unit": ""
},
},
"cpuperf": {
"cpiref": {
"documentation": "The average clock ticks per instruction for each core.",
Expand All @@ -116,7 +33,7 @@ var summarydef = {
"type": "instant",
"unit": "op"
}
},
},
"load1": {
"max": {
"documentation": "The maximum value of the 1-second 'load average' during the job.",
Expand Down Expand Up @@ -152,7 +69,7 @@ var summarydef = {
"unit": "byte"
}
}
},
},
"gpfs": {
"*": {
"read": {
Expand All @@ -176,7 +93,7 @@ var summarydef = {
"unit": "byte"
}
}
},
},
"nodememory": {
"free": {
"documentation": "The average amount of free memory per node for the job. The value is obtained from /proc/meminfo. The average is calculated as the mean value of each memory usage measurement.",
Expand Down Expand Up @@ -207,8 +124,8 @@ var summarydef = {
"process_memory": {
"usage": {
"avg": {
"documentation": "The average amount of memory used in the memory cgroup that contained the job. The value is obtained from the kernel cgroup metrics. The average is calculated as the mean value of each memory usage measurement.",
"type": "instant",
"documentation": "The average amount of memory used in the memory cgroup that contained the job. The value is obtained from the kernel cgroup metrics. The average is calculated as the mean value of each memory usage measurement.",
"type": "instant",
"unit": "byte"
},
"max": {
Expand All @@ -219,8 +136,8 @@ var summarydef = {
},
"usageratio": {
"avg": {
"documentation": "The average ratio of memory used to the memory limit for the processes in the memory cgroup that contained the job. The value is obtained from the kernel cgroup metrics.",
"type": "instant",
"documentation": "The average ratio of memory used to the memory limit for the processes in the memory cgroup that contained the job. The value is obtained from the kernel cgroup metrics.",
"type": "instant",
"unit": "ratio"
},
"max": {
Expand All @@ -234,65 +151,65 @@ var summarydef = {
"type": "instant",
"unit": "byte"
}
},
},
"nfs": {
"documentation": "",
"type": "",
"documentation": "",
"type": "",
"unit": ""
},
},
"simdins": {
"documentation": "",
"type": "",
"documentation": "",
"type": "",
"unit": ""
},
},
"uncperf": {
"membw": {
"documentation": "The average amount of data transferred to and from main memory per node.",
"type": "instant",
"unit": "byte"
}
},
},
"memory": {
"used": {
"documentation": "The mean memory usage during the job. This value is the usage per core. This value includes the OS buffers and kernel slab cache.",
"type": "",
"documentation": "The mean memory usage during the job. This value is the usage per core. This value includes the OS buffers and kernel slab cache.",
"type": "",
"unit": "kilobyte"
},
"used_minus_cache": {
"documentation": "The mean memory usage during the job. This value is the usage per core.",
"type": "",
"documentation": "The mean memory usage during the job. This value is the usage per core.",
"type": "",
"unit": "kilobyte"
}
},
},
"gpu": {
"*": {
"gpuactive": {
"documentation": "The average GPU usage.",
"type": "instant",
"documentation": "The average GPU usage.",
"type": "instant",
"unit": "%"
},
"gpuactivemax": {
"documentation": "The peak GPU usage.",
"type": "instant",
"documentation": "The peak GPU usage.",
"type": "instant",
"unit": "%"
},
"memused": {
"documentation": "The average memory usage per GPU.",
"type": "instant",
"documentation": "The average memory usage per GPU.",
"type": "instant",
"unit": "byte"
},
"memusedmax": {
"documentation": "The peak memory usage for each GPU.",
"type": "instant",
"documentation": "The peak memory usage for each GPU.",
"type": "instant",
"unit": "byte"
}
}
},
},
"proc": {
"documentation": "",
"type": "",
"documentation": "",
"type": "",
"unit": ""
},
},
"cpu": {
"jobcpus": {
"idle": {
Expand Down Expand Up @@ -399,7 +316,7 @@ var summarydef = {
"unit": "byte"
}
}
},
},
"gemini": {
"totalinput": {
"documentation": "",
Expand Down Expand Up @@ -436,15 +353,5 @@ var summarydef = {
"type": "",
"unit": ""
}
}
};

db = db.getSiblingDB("supremm");
var version = parseFloat(db.version());
if (version < 5) {
db.schema.update({_id: sdef._id}, sdef, {upsert: true});
db.schema.update({_id: summarydef._id}, summarydef, {upsert: true});
} else {
db.schema.updateOne({_id: sdef._id}, {$set: sdef}, {upsert: true});
db.schema.updateOne({_id: summarydef._id}, {$set: summarydef}, {upsert: true});
}
}
82 changes: 82 additions & 0 deletions src/supremm/assets/timeseries_schema.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
{
"_id": "timeseries-4",
"type": "timeseries",
"applies_to_version": 4,
"metrics": {
"cpuuser": {
"units": "CPU %",
"description": "CPU User",
"help": "The average percentage of time spent in CPU user mode. The average is computed over each time interval."
},
"membw": {
"units": "GB/s",
"description": "Memory bandwidth",
"help": "The total rate of data transferred to and from main memory. The rate is computed over each time interval. This value is obtained from the hardware counters."
},
"simdins": {
"units": "insts/s",
"description": "SIMD instructions",
"help": "The total rate of floating point SIMD instructions reported by the hardware performance counters on the CPU cores on which the job ran. Note that the meaning of this value is hardware-specific so the data should not in general be compared between HPC resources that have different hardware architectures."
},
"sveins": {
"units": "insts/s",
"description": "SVE instructions",
"help": "The total rate of SVE instructions reported by the hardware performance counters on the CPU cores on which the job ran. The number of floating point operations per SVE instruction retired can vary."
},
"gpu_usage": {
"units": "GPU %",
"description": "GPU utilization %",
"help": "The average percentage of time spent with the GPU active. The average is computed over each time interval."
},
"clktks": {
"units": "insts/s",
"description": "Clock Ticks",
"help": "The total rate of clock ticks reported by the hardware performance counters on the CPU cores on which the job ran. Note that the meaning of this value is hardware-specific so the data should not in general be compared between HPC resources that have different hardware architectures."
},
"memused_minus_diskcache": {
"units": "GB",
"description": "Node Memory RSS",
"help": "The total physical memory used by the operating system excluding memory used for caches. This value includes the contribution for <em>all</em> processes including system daemons and all running HPC jobs but does not include the physical memory used by the kernel page and SLAB caches. For HPC resources that use a Linux-based operating system this value is obtained from the <code>meminfo</code> file in sysfs for each numa node (i.e. <code>/sys/devices/system/node/nodeX/meminfo</code>)"
},
"corepower": {
"units": "Watts",
"help": "The power consumption obtained from hardware measurements built into the CPU or motherboard. This will not include the contribution from other hardware components on the compue node such as periperal cards and power supplies.",
"description": "CPU Power"
},
"power": {
"units": "Watts",
"help": "The power consumption of the compute nodes obtained from the IPMI DCMI interface. This does not include any associated power usage from other datacenter components such as network switches, parallel filesystems and cooling.",
"description": "Node Power"
},
"memused": {
"units": "GB",
"description": "Total Node Memory",
"help": "The total physical memory used by the operating system. For HPC resources that use a Linux-based operating system this value is obtained from the <code>meminfo</code> file in sysfs for each numa node (i.e. <code>/sys/devices/system/node/nodeX/meminfo</code>)"
},
"process_mem_usage": {
"units": "GB",
"description": "Total CGroup Memory",
"help": "The total amount of memory used in the memory cgroup that contained the job. The value is obtained from the kernel cgroup metrics."
},
"ib_lnet": {
"units": "MB/s",
"description": "Interconnect MPI traffic",
"help": "The total rate of data transferred over the parallel interconnect. The rate is computed over each time interval and is the sum of the data sent and received by each node. Some HPC resources also use the interconnect for parallel filesystem traffic; this filesystem traffic is not included in these data."
},
"lnet": {
"units": "MB/s",
"description": "Parallel Filesystem traffic",
"help": "The total rate of data transferred to and from the parallel filesystem. The rate is computed over each time interval and is the sum of data sent and received by each node."
},
"block": {
"units": "GB/s",
"description": "Block Filesystem traffic",
"help": "The total rate of data transferred to and from the block devices on each node. The rate is computed over each time interval and is the sum of data read and written."
},
"nfs": {
"units": "MB/s",
"description": "NFS Filesystem traffic",
"help": "The total rate of data transferred to and from the parallel filesystem over NFS mounts. The rate is computed over each time interval and is the sum of data sent and received by each node."
}
}
}
Loading