@@ -437,53 +437,39 @@ def get_emf_unit_for_fc_metrics(full_key):
437437
438438def flush_fc_metrics_to_cw (fc_metrics , metrics ):
439439 """
440- Flush Firecracker metrics to CloudWatch
441- Use an existing metrics logger with existing dimensions so
442- that its easier to corelate the metrics with the test calling it.
443- Add a prefix "fc_metrics." to differentiate these metrics, this
444- also helps to avoid using this metrics in AB tests.
440+ Flush Firecracker metrics to CloudWatch. Use an existing metrics logger with existing dimensions so that it is
441+ easier to correlate the metrics with the test calling it. Add a prefix "fc_metrics." to differentiate these metrics,
442+ this also helps to avoid using this metrics in A/B tests.
445443 NOTE:
446- There are metrics with keywords "fail", "err",
447- "num_faults", "panic" in their name and represent
448- some kind of failure in Firecracker.
449- This function `does not` assert on these failure metrics
450- since some tests might not want to assert on them while
451- some tests might want to assert on some but not others.
444+ There are metrics with keywords "fail", "err", "num_faults", "panic" in their name and represent some kind of
445+ failure in Firecracker. We assert that all these are zero, to catch potentially silent failure modes. This
446+ means the FcMonitor cannot be used in negative tests that might cause such metrics to be emitted.
452447 """
453448
454- def walk_key (full_key , keys ):
455- for key , value in keys .items ():
456- final_full_key = full_key + "." + key
457- if isinstance (value , dict ):
458- walk_key (final_full_key , value )
459- else :
460- # values are 0 when:
461- # - there is no update
462- # - device is not used
463- # - SharedIncMetric reset to 0 on flush so if
464- # there is no change metric the values remain 0.
465- # We can save the amount of bytes we export to
466- # CloudWatch in these cases.
467- # however it is difficult to differentiate if a 0
468- # should be skipped or upload because it could be
469- # an expected value in some cases so we upload
470- # all the metrics even if data is 0.
471- unit = get_emf_unit_for_fc_metrics (final_full_key )
472- metrics .put_metric (f"fc_metrics.{ final_full_key } " , value , unit = unit )
473-
474- # List of SharedStoreMetric that once updated have the same value thoughout the life of vm
475- metrics_to_export_once = {
476- "api_server" ,
477- "latencies_us" ,
478- }
479- skip = set ()
480- for group , keys in fc_metrics .items ():
481- if group == "utc_timestamp_ms" :
449+ # Pre-order tree traversal to convert a tree into its list of paths with dot separate segments
450+ def flatten_dict (node , prefix : str ):
451+ if not isinstance (node , dict ):
452+ return {prefix : node }
453+
454+ result = {}
455+ for child_metric_name , child_metrics in node .items ():
456+ result .update (flatten_dict (child_metrics , f"{ prefix } .{ child_metric_name } " ))
457+ return result
458+
459+ flattened_metrics = flatten_dict (fc_metrics , "fc_metrics" )
460+
461+ for key , value in flattened_metrics .items ():
462+ if ".utc_timestamp_ms." in key :
482463 continue
483- if group not in skip :
484- walk_key (group , keys )
485- if group in metrics_to_export_once :
486- skip .add (group )
464+ metrics .put_metric (key , value , get_emf_unit_for_fc_metrics (key ))
465+
466+ assert not {
467+ key : value
468+ for key , value in flattened_metrics .items ()
469+ if "err" in key or "fail" in key or "panic" in key or "num_faults" in key
470+ if value
471+ }
472+
487473 metrics .flush ()
488474
489475
@@ -514,6 +500,7 @@ def __init__(self, vm, timer=60):
514500 "guest_kernel" : vm .kernel_file .stem [2 :],
515501 }
516502 )
503+ self .start ()
517504
518505 def _flush_metrics (self ):
519506 """
0 commit comments