|
18 | 18 | package org.apache.spark.sql.execution.metric |
19 | 19 |
|
20 | 20 | import java.io.File |
| 21 | +import java.util.regex.Pattern |
21 | 22 |
|
22 | 23 | import scala.collection.mutable.HashMap |
23 | 24 |
|
@@ -198,6 +199,51 @@ trait SQLMetricsTestUtils extends SQLTestUtils { |
198 | 199 | } |
199 | 200 | } |
200 | 201 | } |
| 202 | + |
| 203 | + private def metricStats(metricStr: String): Seq[String] = { |
| 204 | + val sum = metricStr.substring(0, metricStr.indexOf("(")).stripPrefix("\n").stripSuffix(" ") |
| 205 | + val minMedMax = metricStr.substring(metricStr.indexOf("(") + 1, metricStr.indexOf(")")) |
| 206 | + .split(", ").toSeq |
| 207 | + (sum +: minMedMax) |
| 208 | + } |
| 209 | + |
| 210 | + private def stringToBytes(str: String): (Float, String) = { |
| 211 | + val matcher = Pattern.compile("([0-9]+(\\.[0-9]+)?) (EB|PB|TB|GB|MB|KB|B)").matcher(str) |
| 212 | + if (matcher.matches()) { |
| 213 | + (matcher.group(1).toFloat, matcher.group(3)) |
| 214 | + } else { |
| 215 | + throw new NumberFormatException("Failed to parse byte string: " + str) |
| 216 | + } |
| 217 | + } |
| 218 | + |
| 219 | + private def stringToDuration(str: String): (Float, String) = { |
| 220 | + val matcher = Pattern.compile("([0-9]+(\\.[0-9]+)?) (ms|s|m|h)").matcher(str) |
| 221 | + if (matcher.matches()) { |
| 222 | + (matcher.group(1).toFloat, matcher.group(3)) |
| 223 | + } else { |
| 224 | + throw new NumberFormatException("Failed to parse time string: " + str) |
| 225 | + } |
| 226 | + } |
| 227 | + |
| 228 | + /** |
| 229 | + * Convert a size metric string to a sequence of stats, including sum, min, med and max in order, |
| 230 | + * each a tuple of (value, unit). |
| 231 | + * @param metricStr size metric string, e.g. "\n96.2 MB (32.1 MB, 32.1 MB, 32.1 MB)" |
| 232 | + * @return A sequence of stats, e.g. ((96.2,MB), (32.1,MB), (32.1,MB), (32.1,MB)) |
| 233 | + */ |
| 234 | + protected def sizeMetricStats(metricStr: String): Seq[(Float, String)] = { |
| 235 | + metricStats(metricStr).map(stringToBytes) |
| 236 | + } |
| 237 | + |
| 238 | + /** |
| 239 | + * Convert a timing metric string to a sequence of stats, including sum, min, med and max in |
| 240 | + * order, each a tuple of (value, unit). |
| 241 | + * @param metricStr timing metric string, e.g. "\n2.0 ms (1.0 ms, 1.0 ms, 1.0 ms)" |
| 242 | + * @return A sequence of stats, e.g. ((2.0,ms), (1.0,ms), (1.0,ms), (1.0,ms)) |
| 243 | + */ |
| 244 | + protected def timingMetricStats(metricStr: String): Seq[(Float, String)] = { |
| 245 | + metricStats(metricStr).map(stringToDuration) |
| 246 | + } |
201 | 247 | } |
202 | 248 |
|
203 | 249 |
|
|
0 commit comments