firecracker-microvm
diff --git a/‎Cargo.lock‎
Lines changed: 13 additions & 34 deletions b/‎Cargo.lock‎
Lines changed: 13 additions & 34 deletions
diff --git a/‎docs/getting-started.md‎
Lines changed: 3 additions & 3 deletions b/‎docs/getting-started.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎src/firecracker/Cargo.toml‎
Lines changed: 1 addition & 1 deletion b/‎src/firecracker/Cargo.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/jailer/Cargo.toml‎
Lines changed: 1 addition & 1 deletion b/‎src/jailer/Cargo.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎tests/framework/microvm.py‎
Lines changed: 7 additions & 0 deletions b/‎tests/framework/microvm.py‎
Lines changed: 7 additions & 0 deletions
diff --git a/‎tests/host_tools/fcmetrics.py‎
Lines changed: 30 additions & 43 deletions b/‎tests/host_tools/fcmetrics.py‎
Lines changed: 30 additions & 43 deletions
diff --git a/‎tests/integration_tests/performance/test_block_ab.py‎
Lines changed: 2 additions & 11 deletions b/‎tests/integration_tests/performance/test_block_ab.py‎
Lines changed: 2 additions & 11 deletions
diff --git a/‎tests/integration_tests/performance/test_memory_overhead.py‎
Lines changed: 1 addition & 6 deletions b/‎tests/integration_tests/performance/test_memory_overhead.py‎
Lines changed: 1 addition & 6 deletions
@@ -182,10 +182,10 @@ HOST_IFACE="eth0"
 sudo iptables -t nat -D POSTROUTING -o "$HOST_IFACE" -j MASQUERADE || true
 sudo iptables -D FORWARD -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT \
     || true
-sudo iptables -D FORWARD -i tap0 -o "$HOST_IFACE" -j ACCEPT || true
+sudo iptables -D FORWARD -i "$TAP_DEV" -o "$HOST_IFACE" -j ACCEPT || true
 sudo iptables -t nat -A POSTROUTING -o "$HOST_IFACE" -j MASQUERADE
 sudo iptables -I FORWARD 1 -m conntrack --ctstate RELATED,ESTABLISHED -j ACCEPT
-sudo iptables -I FORWARD 1 -i tap0 -o "$HOST_IFACE" -j ACCEPT
+sudo iptables -I FORWARD 1 -i "$TAP_DEV" -o "$HOST_IFACE" -j ACCEPT
 
 API_SOCKET="/tmp/firecracker.socket"
 LOGFILE="./firecracker.log"
@@ -203,7 +203,7 @@ sudo curl -X PUT --unix-socket "${API_SOCKET}" \
     }" \
     "http://localhost/logger"
 
-KERNEL="./vmlinux-5.10.210"
+KERNEL="./$(ls vmlinux* | tail -1)"
 KERNEL_BOOT_ARGS="console=ttyS0 reboot=k panic=1 pci=off"
 
 ARCH=$(uname -m)
 
@@ -34,7 +34,7 @@ vmm = { path = "../vmm" }
 [dev-dependencies]
 cargo_toml = "0.20.4"
 libc = "0.2.155"
-regex = { version = "1.10.5", default-features = false, features = ["std", "unicode-perl"] }
+regex = { version = "1.10.6", default-features = false, features = ["std", "unicode-perl"] }
 
 # Dev-Dependencies for uffd examples
 serde = { version = "1.0.204", features = ["derive"] }
 
@@ -15,7 +15,7 @@ bench = false
 libc = "0.2.155"
 log-instrument = { path = "../log-instrument", optional = true }
 nix = { version = "0.29.0", default-features = false, features = ["dir"] }
-regex = { version = "1.10.5", default-features = false, features = ["std"] }
+regex = { version = "1.10.6", default-features = false, features = ["std"] }
 thiserror = "1.0.63"
 
 utils = { path = "../utils" }
 
@@ -39,6 +39,7 @@
 from framework.microvm_helpers import MicrovmHelpers
 from framework.properties import global_props
 from framework.utils_drive import VhostUserBlkBackend, VhostUserBlkBackendType
+from host_tools.fcmetrics import FCMetricsMonitor
 from host_tools.memory import MemoryMonitor
 
 LOG = logging.getLogger("microvm")
@@ -558,6 +559,7 @@ def spawn(
         log_show_level=False,
         log_show_origin=False,
         metrics_path="fc.ndjson",
+        emit_metrics: bool = False,
     ):
         """Start a microVM as a daemon or in a screen session."""
         # pylint: disable=subprocess-run-check
@@ -583,6 +585,8 @@ def spawn(
             self.metrics_file.touch()
             self.create_jailed_resource(self.metrics_file)
             self.jailer.extra_args.update({"metrics-path": self.metrics_file.name})
+        else:
+            assert not emit_metrics
 
         if self.metadata_file:
             if os.path.exists(self.metadata_file):
@@ -619,6 +623,9 @@ def spawn(
 
         self._spawned = True
 
+        if emit_metrics:
+            self.monitors.append(FCMetricsMonitor(self))
+
         # Wait for the jailer to create resources needed, and Firecracker to
         # create its API socket.
         # We expect the jailer to start within 80 ms. However, we wait for
 
@@ -437,53 +437,39 @@ def get_emf_unit_for_fc_metrics(full_key):
 
 def flush_fc_metrics_to_cw(fc_metrics, metrics):
     """
-    Flush Firecracker metrics to CloudWatch
-    Use an existing metrics logger with existing dimensions so
-    that its easier to corelate the metrics with the test calling it.
-    Add a prefix "fc_metrics." to differentiate these metrics, this
-    also helps to avoid using this metrics in AB tests.
+    Flush Firecracker metrics to CloudWatch. Use an existing metrics logger with existing dimensions so that it is
+    easier to correlate the metrics with the test calling it. Add a prefix "fc_metrics." to differentiate these metrics,
+    this also helps to avoid using this metrics in A/B tests.
     NOTE:
-        There are metrics with keywords "fail", "err",
-        "num_faults", "panic" in their name and represent
-        some kind of failure in Firecracker.
-        This function `does not` assert on these failure metrics
-        since some tests might not want to assert on them while
-        some tests might want to assert on some but not others.
+        There are metrics with keywords "fail", "err", "num_faults", "panic" in their name and represent some kind of
+        failure in Firecracker.  We assert that all these are zero, to catch potentially silent failure modes. This
+        means the FcMonitor cannot be used in negative tests that might cause such metrics to be emitted.
     """
 
-    def walk_key(full_key, keys):
-        for key, value in keys.items():
-            final_full_key = full_key + "." + key
-            if isinstance(value, dict):
-                walk_key(final_full_key, value)
-            else:
-                # values are 0 when:
-                # - there is no update
-                # - device is not used
-                # - SharedIncMetric reset to 0 on flush so if
-                #   there is no change metric the values remain 0.
-                # We can save the amount of bytes we export to
-                # CloudWatch in these cases.
-                # however it is difficult to differentiate if a 0
-                # should be skipped or upload because it could be
-                # an expected value in some cases so we upload
-                # all the metrics even if data is 0.
-                unit = get_emf_unit_for_fc_metrics(final_full_key)
-                metrics.put_metric(f"fc_metrics.{final_full_key}", value, unit=unit)
-
-    # List of SharedStoreMetric that once updated have the same value thoughout the life of vm
-    metrics_to_export_once = {
-        "api_server",
-        "latencies_us",
-    }
-    skip = set()
-    for group, keys in fc_metrics.items():
-        if group == "utc_timestamp_ms":
+    # Pre-order tree traversal to convert a tree into its list of paths with dot separate segments
+    def flatten_dict(node, prefix: str):
+        if not isinstance(node, dict):
+            return {prefix: node}
+
+        result = {}
+        for child_metric_name, child_metrics in node.items():
+            result.update(flatten_dict(child_metrics, f"{prefix}.{child_metric_name}"))
+        return result
+
+    flattened_metrics = flatten_dict(fc_metrics, "fc_metrics")
+
+    for key, value in flattened_metrics.items():
+        if ".utc_timestamp_ms." in key:
             continue
-        if group not in skip:
-            walk_key(group, keys)
-            if group in metrics_to_export_once:
-                skip.add(group)
+        metrics.put_metric(key, value, get_emf_unit_for_fc_metrics(key))
+
+    assert not {
+        key: value
+        for key, value in flattened_metrics.items()
+        if "err" in key or "fail" in key or "panic" in key or "num_faults" in key
+        if value
+    }
+
     metrics.flush()
 
 
@@ -514,6 +500,7 @@ def __init__(self, vm, timer=60):
                 "guest_kernel": vm.kernel_file.stem[2:],
             }
         )
+        self.start()
 
     def _flush_metrics(self):
         """
 
@@ -11,7 +11,6 @@
 
 import host_tools.drive as drive_tools
 from framework.utils import CmdBuilder, check_output, track_cpu_utilization
-from host_tools.fcmetrics import FCMetricsMonitor
 
 # size of the block device used in the test, in MB
 BLOCK_DEVICE_SIZE_MB = 2048
@@ -155,7 +154,7 @@ def test_block_performance(
     Execute block device emulation benchmarking scenarios.
     """
     vm = microvm_factory.build(guest_kernel, rootfs, monitor_memory=False)
-    vm.spawn(log_level="Info")
+    vm.spawn(log_level="Info", emit_metrics=True)
     vm.basic_config(vcpu_count=vcpus, mem_size_mib=GUEST_MEM_MIB)
     vm.add_net_iface()
     # Add a secondary block device for benchmark tests.
@@ -174,8 +173,6 @@ def test_block_performance(
             **vm.dimensions,
         }
     )
-    fcmetrics = FCMetricsMonitor(vm)
-    fcmetrics.start()
 
     vm.pin_threads(0)
 
@@ -187,8 +184,6 @@ def test_block_performance(
         for value in values:
             metrics.put_metric(f"cpu_utilization_{thread_name}", value, "Percent")
 
-    fcmetrics.stop()
-
 
 @pytest.mark.nonci
 @pytest.mark.parametrize("vcpus", [1, 2], ids=["1vcpu", "2vcpu"])
@@ -208,7 +203,7 @@ def test_block_vhost_user_performance(
     """
 
     vm = microvm_factory.build(guest_kernel, rootfs, monitor_memory=False)
-    vm.spawn(log_level="Info")
+    vm.spawn(log_level="Info", emit_metrics=True)
     vm.basic_config(vcpu_count=vcpus, mem_size_mib=GUEST_MEM_MIB)
     vm.add_net_iface()
 
@@ -226,8 +221,6 @@ def test_block_vhost_user_performance(
             **vm.dimensions,
         }
     )
-    fcmetrics = FCMetricsMonitor(vm)
-    fcmetrics.start()
 
     next_cpu = vm.pin_threads(0)
     vm.disks_vhost_user["scratch"].pin(next_cpu)
@@ -239,5 +232,3 @@ def test_block_vhost_user_performance(
     for thread_name, values in cpu_util.items():
         for value in values:
             metrics.put_metric(f"cpu_utilization_{thread_name}", value, "Percent")
-
-    fcmetrics.stop()
@@ -20,8 +20,6 @@
 import psutil
 import pytest
 
-from host_tools.fcmetrics import FCMetricsMonitor
-
 # If guest memory is >3328MB, it is split in a 2nd region
 X86_MEMORY_GAP_START = 3328 * 2**20
 
@@ -40,15 +38,13 @@ def test_memory_overhead(
 
     for _ in range(5):
         microvm = microvm_factory.build(guest_kernel, rootfs)
-        microvm.spawn()
+        microvm.spawn(emit_metrics=True)
         microvm.basic_config(vcpu_count=vcpu_count, mem_size_mib=mem_size_mib)
         microvm.add_net_iface()
         microvm.start()
         metrics.set_dimensions(
             {"performance_test": "test_memory_overhead", **microvm.dimensions}
         )
-        fcmetrics = FCMetricsMonitor(microvm)
-        fcmetrics.start()
         microvm.wait_for_up()
 
         guest_mem_bytes = mem_size_mib * 2**20
@@ -84,4 +80,3 @@ def test_memory_overhead(
         for metric in ["uss", "text"]:
             val = getattr(mem_info, metric)
             metrics.put_metric(metric, val, unit="Bytes")
-        fcmetrics.stop()