From 94fe020b9bf7b72b1855fe1395ed8cf9ced3b514 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 16 Apr 2025 13:49:43 -0400
Subject: [PATCH 01/66] add NetCostHandler::estimate_routing_chann_util()

---
 vpr/src/place/net_cost_handler.cpp | 42 ++++++++++++++++++++++++++++++
 vpr/src/place/net_cost_handler.h   |  2 ++
 2 files changed, 44 insertions(+)
diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 65ec74dbb47..8181787a816 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -1639,6 +1639,48 @@ double NetCostHandler::get_total_wirelength_estimate() const {
     return estimated_wirelength;
 }
 
+void NetCostHandler::estimate_routing_chann_util() const {
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
+    const auto& place_move_ctx = placer_state_.move();
+    const auto& device_ctx = g_vpr_ctx.device();
+
+    auto chanx_occ = vtr::Matrix<double>({{
+                                          device_ctx.grid.width(),     //[0 .. device_ctx.grid.width() - 1] (length of x channel)
+                                          device_ctx.grid.height() - 1 //[0 .. device_ctx.grid.height() - 2] (# x channels)
+                                      }},
+                                      0);
+
+    auto chany_occ = vtr::Matrix<double>({{
+                                          device_ctx.grid.width() - 1, //[0 .. device_ctx.grid.width() - 2] (# y channels)
+                                          device_ctx.grid.height()     //[0 .. device_ctx.grid.height() - 1] (length of y channel)
+                                      }},
+                                      0);
+
+    for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) {
+        if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
+            const t_bb& bb = place_move_ctx.bb_coords[net_id];
+            double expected_wirelength = get_net_wirelength_estimate(net_id, bb);
+            int n_y_channels = bb.xmax - bb.xmin + 1;
+            int n_x_channels = bb.ymax - bb.ymin + 1;
+
+
+            double expected_x_wl = (double)n_x_channels / (n_x_channels + n_y_channels) * expected_wirelength;
+            double expected_y_wl = expected_wirelength - expected_x_wl;
+
+            int total_channel_segments = n_y_channels * n_x_channels;
+            double expected_per_x_segment_wl = expected_x_wl / total_channel_segments;
+            double expected_per_y_segment_wl = expected_y_wl / total_channel_segments;
+
+            for (int x = bb.xmin; x <= bb.xmax; x++) {
+                for (int y = bb.ymin; y <= bb.ymax; y++) {
+                    chanx_occ[x][y] += expected_per_x_segment_wl;
+                    chany_occ[x][y] += expected_per_y_segment_wl;
+                }
+            }
+        }
+    }
+}
+
 void NetCostHandler::set_ts_bb_coord_(const ClusterNetId net_id) {
     auto& place_move_ctx = placer_state_.mutable_move();
     if (cube_bb_) {
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index 510ffa60653..cdc38f30abd 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -126,6 +126,8 @@ class NetCostHandler {
      */
     double get_total_wirelength_estimate() const;
 
+    void estimate_routing_chann_util() const;
+
   private:
     ///@brief Specifies whether the bounding box is computed using cube method or per-layer method.
     bool cube_bb_;

From ee12d6d0d51f54707c1edb7e8d6fa06a668c6c35 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 16 Apr 2025 14:12:43 -0400
Subject: [PATCH 02/66] fix wl contribution in each direction

estimated WL should be divided in proportion to the distance traveled in each direction
---
 vpr/src/place/net_cost_handler.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 8181787a816..ae717609cf9 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -1660,14 +1660,14 @@ void NetCostHandler::estimate_routing_chann_util() const {
         if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
             const t_bb& bb = place_move_ctx.bb_coords[net_id];
             double expected_wirelength = get_net_wirelength_estimate(net_id, bb);
-            int n_y_channels = bb.xmax - bb.xmin + 1;
-            int n_x_channels = bb.ymax - bb.ymin + 1;
 
+            int distance_x = bb.xmax - bb.xmin + 1;
+            int distance_y = bb.ymax - bb.ymin + 1;
 
-            double expected_x_wl = (double)n_x_channels / (n_x_channels + n_y_channels) * expected_wirelength;
+            double expected_x_wl = (double)distance_x / (distance_x + distance_y) * expected_wirelength;
             double expected_y_wl = expected_wirelength - expected_x_wl;
 
-            int total_channel_segments = n_y_channels * n_x_channels;
+            int total_channel_segments = distance_x * distance_y;
             double expected_per_x_segment_wl = expected_x_wl / total_channel_segments;
             double expected_per_y_segment_wl = expected_y_wl / total_channel_segments;
 

From 15f76fb59799973e611db3679df0b687f4ac446e Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 16 Apr 2025 19:25:05 -0400
Subject: [PATCH 03/66] write post-placement estimate of routing chann util

---
 vpr/src/base/stats.cpp             | 19 +------------------
 vpr/src/base/stats.h               | 18 ++++++++++++++++++
 vpr/src/place/net_cost_handler.cpp | 28 ++++++++++++++++++++++++++++
 vpr/src/place/placer.cpp           |  2 ++
 4 files changed, 49 insertions(+), 18 deletions(-)

diff --git a/vpr/src/base/stats.cpp b/vpr/src/base/stats.cpp
index 774235bf2cc..d76201143bf 100644
--- a/vpr/src/base/stats.cpp
+++ b/vpr/src/base/stats.cpp
@@ -31,23 +31,6 @@ static void load_channel_occupancies(const Netlist<>& net_list,
                                      vtr::Matrix<int>& chanx_occ,
                                      vtr::Matrix<int>& chany_occ);
 
-/**
- * @brief Writes channel occupancy data to a file.
- *
- * Each row contains:
- *   - (x, y) coordinate
- *   - Occupancy count
- *   - Occupancy percentage (occupancy / capacity)
- *   - Channel capacity
- *
- * @param filename      Output file path.
- * @param occupancy     Matrix of occupancy counts.
- * @param capacity_list List of channel capacities (per y for chanx, per x for chany).
- */
-static void write_channel_occupancy_table(const std::string_view filename,
-                                          const vtr::Matrix<int>& occupancy,
-                                          const std::vector<int>& capacity_list);
-
 /**
  * @brief Figures out maximum, minimum and average number of bends
  *        and net length in the routing.
@@ -249,7 +232,7 @@ static void get_channel_occupancy_stats(const Netlist<>& net_list, bool /***/) {
     VTR_LOG("\n");
 }
 
-static void write_channel_occupancy_table(const std::string_view filename,
+void write_channel_occupancy_table(const std::string_view filename,
                                           const vtr::Matrix<int>& occupancy,
                                           const std::vector<int>& capacity_list) {
     constexpr int w_coord = 6;
diff --git a/vpr/src/base/stats.h b/vpr/src/base/stats.h
index 5f9e50e0700..4f7a3017c5f 100644
--- a/vpr/src/base/stats.h
+++ b/vpr/src/base/stats.h
@@ -2,6 +2,7 @@
 #include <vector>
 #include <limits>
 #include <algorithm>
+#include <string_view>
 #include "vpr_types.h"
 #include "netlist.h"
 
@@ -47,3 +48,20 @@ void print_resource_usage();
  * @param target_device_utilization The target device utilization set by the user
  */
 void print_device_utilization(const float target_device_utilization);
+
+/**
+ * @brief Writes channel occupancy data to a file.
+ *
+ * Each row contains:
+ *   - (x, y) coordinate
+ *   - Occupancy count
+ *   - Occupancy percentage (occupancy / capacity)
+ *   - Channel capacity
+ *
+ * @param filename      Output file path.
+ * @param occupancy     Matrix of occupancy counts.
+ * @param capacity_list List of channel capacities (per y for chanx, per x for chany).
+ */
+void write_channel_occupancy_table(const std::string_view filename,
+                                   const vtr::Matrix<int>& occupancy,
+                                   const std::vector<int>& capacity_list);
\ No newline at end of file
diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index ae717609cf9..1cfabab601e 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -35,6 +35,7 @@
 #include "vtr_ndmatrix.h"
 #include "PlacerCriticalities.h"
 #include "vtr_prefix_sum.h"
+#include "stats.h"
 
 #include <array>
 
@@ -1679,6 +1680,33 @@ void NetCostHandler::estimate_routing_chann_util() const {
             }
         }
     }
+
+    auto chanx_occ_int = vtr::Matrix<int>({{
+                                              device_ctx.grid.width(),
+                                              device_ctx.grid.height() - 1
+                                          }},
+                                          0);
+
+    auto chany_occ_int = vtr::Matrix<int>({{
+                                              device_ctx.grid.width() - 1,
+                                              device_ctx.grid.height()
+                                          }},
+                                          0);
+
+    for (size_t x = 0; x < chanx_occ.dim_size(0); ++x) {
+        for (size_t y = 0; y < chanx_occ.dim_size(1); ++y) {
+            chanx_occ_int[x][y] = static_cast<int>(std::round(chanx_occ[x][y]));
+        }
+    }
+
+    for (size_t x = 0; x < chany_occ.dim_size(0); ++x) {
+        for (size_t y = 0; y < chany_occ.dim_size(1); ++y) {
+            chany_occ_int[x][y] = static_cast<int>(std::round(chany_occ[x][y]));
+        }
+    }
+
+    write_channel_occupancy_table("place_chanx_occupancy.txt", chanx_occ_int, device_ctx.chan_width.x_list);
+    write_channel_occupancy_table("place_chany_occupancy.txt", chany_occ_int, device_ctx.chan_width.y_list);
 }
 
 void NetCostHandler::set_ts_bb_coord_(const ClusterNetId net_id) {
diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp
index 52f68a442e2..b7c689d074c 100644
--- a/vpr/src/place/placer.cpp
+++ b/vpr/src/place/placer.cpp
@@ -384,6 +384,8 @@ void Placer::place() {
     check_place_();
 
     log_printer_.print_post_placement_stats();
+
+    net_cost_handler_.estimate_routing_chann_util();
 }
 
 void Placer::copy_locs_to_global_state(PlacementContext& place_ctx) {

From 45a5c0c84dff546489d3aa5fdc6ff8a070b8c56c Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Fri, 18 Apr 2025 16:49:32 -0400
Subject: [PATCH 04/66] pass the vector by reference to PrefixSum1D constructor

---
 libs/libvtrutil/src/vtr_prefix_sum.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/libs/libvtrutil/src/vtr_prefix_sum.h b/libs/libvtrutil/src/vtr_prefix_sum.h
index 31635904f1b..ef716e353cc 100644
--- a/libs/libvtrutil/src/vtr_prefix_sum.h
+++ b/libs/libvtrutil/src/vtr_prefix_sum.h
@@ -93,7 +93,7 @@ class PrefixSum1D {
     /**
      * @brief Construct the 1D prefix sum from a vector.
      */
-    PrefixSum1D(std::vector<T> vals, T zero = T())
+    PrefixSum1D(const std::vector<T>& vals, T zero = T())
         : PrefixSum1D(
               vals.size(),
               [&](size_t x) noexcept {

From 6b563c0a203ce1b7f04a465d4de143d171721bc0 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Fri, 18 Apr 2025 17:36:06 -0400
Subject: [PATCH 05/66] add acc_chanx_util_ and acc_chany_util_ to
 NetCostHandler

---
 vpr/src/place/net_cost_handler.cpp | 11 +++++++----
 vpr/src/place/net_cost_handler.h   |  4 ++++
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index c775e674354..5bb1e3c81dd 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -1527,8 +1527,8 @@ void NetCostHandler::find_affected_nets_and_update_costs(const PlaceDelayModel*
                                                          t_pl_blocks_to_be_moved& blocks_affected,
                                                          double& bb_delta_c,
                                                          double& timing_delta_c) {
-    VTR_ASSERT_SAFE(bb_delta_c == 0.);
-    VTR_ASSERT_SAFE(timing_delta_c == 0.);
+    VTR_ASSERT_DEBUG(bb_delta_c == 0.);
+    VTR_ASSERT_DEBUG(timing_delta_c == 0.);
     auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist;
 
     ts_nets_to_update_.resize(0);
@@ -1641,7 +1641,7 @@ double NetCostHandler::get_total_wirelength_estimate() const {
     return estimated_wirelength;
 }
 
-void NetCostHandler::estimate_routing_chann_util() const {
+void NetCostHandler::estimate_routing_chann_util() {
     const auto& cluster_ctx = g_vpr_ctx.clustering();
     const auto& place_move_ctx = placer_state_.move();
     const auto& device_ctx = g_vpr_ctx.device();
@@ -1661,7 +1661,7 @@ void NetCostHandler::estimate_routing_chann_util() const {
     for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) {
         if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
             const t_bb& bb = place_move_ctx.bb_coords[net_id];
-            double expected_wirelength = get_net_wirelength_estimate(net_id, bb);
+            double expected_wirelength = get_net_wirelength_estimate_(net_id);
 
             int distance_x = bb.xmax - bb.xmin + 1;
             int distance_y = bb.ymax - bb.ymin + 1;
@@ -1682,6 +1682,9 @@ void NetCostHandler::estimate_routing_chann_util() const {
         }
     }
 
+    acc_chanx_util_ = vtr::PrefixSum2D<double>(chanx_occ);
+    acc_chany_util_ = vtr::PrefixSum2D<double>(chanx_occ);
+
     auto chanx_occ_int = vtr::Matrix<int>({{
                                               device_ctx.grid.width(),
                                               device_ctx.grid.height() - 1
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index 1e557ae56a5..57bfc7ce8cb 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -205,6 +205,10 @@ class NetCostHandler {
     vtr::PrefixSum1D<int> acc_chanx_width_; // [0..device_ctx.grid.width()-1]
     vtr::PrefixSum1D<int> acc_chany_width_; // [0..device_ctx.grid.height()-1]
 
+    vtr::PrefixSum2D<double> acc_chanx_util_;
+    vtr::PrefixSum2D<double> acc_chany_util_;
+
+
     /**
      * @brief The matrix below is used to calculate a chanz_place_cost_fac based on the average channel width in 
      * the cross-die-layer direction over a 2D (x,y) region. We don't assume the inter-die connectivity is the same at all (x,y) locations, so we

From 9157961a6f53cb413bc7c60d8c24b61d6baa2dbf Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Sat, 19 Apr 2025 19:22:23 -0400
Subject: [PATCH 06/66] compute chan utilization ratio instead of occupancy

---
 vpr/src/place/net_cost_handler.cpp | 42 +++++++++++++++++++-----------
 vpr/src/place/net_cost_handler.h   | 21 ++++++++++-----
 2 files changed, 42 insertions(+), 21 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 85cd3ae80ca..7378623a9b9 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -109,6 +109,9 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
     if (cube_bb_) {
         ts_bb_edge_new_.resize(num_nets, t_bb());
         ts_bb_coord_new_.resize(num_nets, t_bb());
+
+        ts_net_avg_chann_util_new_.resize(num_nets);
+
         bb_coords_.resize(num_nets, t_bb());
         bb_num_on_edges_.resize(num_nets, t_bb());
         comp_bb_cost_functor_ = std::bind(&NetCostHandler::comp_cube_bb_cost_, this, std::placeholders::_1);
@@ -533,6 +536,12 @@ void NetCostHandler::get_non_updatable_cube_bb_(ClusterNetId net_id, bool use_ts
 
         num_sink_pin_layer[pin_loc.layer_num]++;
     }
+
+    // the average channel utilization that is going to be updated by this function
+    auto& [x_chan_util, y_chan_util] = use_ts ? ts_net_avg_chann_util_new_[net_id] : net_avg_chann_util_[net_id];
+    const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1);
+    x_chan_util = acc_chanx_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels;
+    y_chan_util = acc_chany_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels;
 }
 
 void NetCostHandler::get_non_updatable_per_layer_bb_(ClusterNetId net_id, bool use_ts) {
@@ -1636,16 +1645,15 @@ double NetCostHandler::get_total_wirelength_estimate() const {
 
 void NetCostHandler::estimate_routing_chann_util() {
     const auto& cluster_ctx = g_vpr_ctx.clustering();
-    const auto& place_move_ctx = placer_state_.move();
     const auto& device_ctx = g_vpr_ctx.device();
 
-    auto chanx_occ = vtr::Matrix<double>({{
+    auto chanx_util = vtr::Matrix<double>({{
                                           device_ctx.grid.width(),     //[0 .. device_ctx.grid.width() - 1] (length of x channel)
                                           device_ctx.grid.height() - 1 //[0 .. device_ctx.grid.height() - 2] (# x channels)
                                       }},
                                       0);
 
-    auto chany_occ = vtr::Matrix<double>({{
+    auto chany_util = vtr::Matrix<double>({{
                                           device_ctx.grid.width() - 1, //[0 .. device_ctx.grid.width() - 2] (# y channels)
                                           device_ctx.grid.height()     //[0 .. device_ctx.grid.height() - 1] (length of y channel)
                                       }},
@@ -1653,7 +1661,7 @@ void NetCostHandler::estimate_routing_chann_util() {
 
     for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) {
         if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
-            const t_bb& bb = place_move_ctx.bb_coords[net_id];
+            const t_bb& bb = bb_coords_[net_id];
             double expected_wirelength = get_net_wirelength_estimate_(net_id);
 
             int distance_x = bb.xmax - bb.xmin + 1;
@@ -1668,16 +1676,13 @@ void NetCostHandler::estimate_routing_chann_util() {
 
             for (int x = bb.xmin; x <= bb.xmax; x++) {
                 for (int y = bb.ymin; y <= bb.ymax; y++) {
-                    chanx_occ[x][y] += expected_per_x_segment_wl;
-                    chany_occ[x][y] += expected_per_y_segment_wl;
+                    chanx_util[x][y] += expected_per_x_segment_wl;
+                    chany_util[x][y] += expected_per_y_segment_wl;
                 }
             }
         }
     }
 
-    acc_chanx_util_ = vtr::PrefixSum2D<double>(chanx_occ);
-    acc_chany_util_ = vtr::PrefixSum2D<double>(chanx_occ);
-
     auto chanx_occ_int = vtr::Matrix<int>({{
                                               device_ctx.grid.width(),
                                               device_ctx.grid.height() - 1
@@ -1690,20 +1695,27 @@ void NetCostHandler::estimate_routing_chann_util() {
                                           }},
                                           0);
 
-    for (size_t x = 0; x < chanx_occ.dim_size(0); ++x) {
-        for (size_t y = 0; y < chanx_occ.dim_size(1); ++y) {
-            chanx_occ_int[x][y] = static_cast<int>(std::round(chanx_occ[x][y]));
+    const t_chan_width& chan_width = device_ctx.chan_width;
+
+    for (size_t x = 0; x < chanx_util.dim_size(0); ++x) {
+        for (size_t y = 0; y < chanx_util.dim_size(1); ++y) {
+            chanx_occ_int[x][y] = static_cast<int>(std::round(chanx_util[x][y]));
+            chanx_util[x][y] /= chan_width.x_list[y];
         }
     }
 
-    for (size_t x = 0; x < chany_occ.dim_size(0); ++x) {
-        for (size_t y = 0; y < chany_occ.dim_size(1); ++y) {
-            chany_occ_int[x][y] = static_cast<int>(std::round(chany_occ[x][y]));
+    for (size_t x = 0; x < chany_util.dim_size(0); ++x) {
+        for (size_t y = 0; y < chany_util.dim_size(1); ++y) {
+            chany_occ_int[x][y] = static_cast<int>(std::round(chany_util[x][y]));
+            chany_util[x][y] /= chan_width.y_list[x];
         }
     }
 
     write_channel_occupancy_table("place_chanx_occupancy.txt", chanx_occ_int, device_ctx.chan_width.x_list);
     write_channel_occupancy_table("place_chany_occupancy.txt", chany_occ_int, device_ctx.chan_width.y_list);
+
+    acc_chanx_util_ = vtr::PrefixSum2D<double>(chanx_util);
+    acc_chany_util_ = vtr::PrefixSum2D<double>(chany_util);
 }
 
 void NetCostHandler::set_ts_bb_coord_(const ClusterNetId net_id) {
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index fc6893e0158..524d6911091 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -126,7 +126,7 @@ class NetCostHandler {
      */
     double get_total_wirelength_estimate() const;
 
-    void estimate_routing_chann_util() const;
+    void estimate_routing_chann_util();
 
   private:
     ///@brief Specifies whether the bounding box is computed using cube method or per-layer method.
@@ -171,19 +171,28 @@ class NetCostHandler {
     /* [0...num_affected_nets] -> net_id of the affected nets */
     std::vector<ClusterNetId> ts_nets_to_update_;
 
-    // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each of a net's bounding box (to allow efficient updates)
+    vtr::vector<ClusterNetId, std::pair<float, float>> ts_net_avg_chann_util_new_;
+
+    /// Store the number of blocks on each of a net's bounding box (to allow efficient updates)
+    /// [0..cluster_ctx.clb_nlist.nets().size()-1]
     vtr::vector<ClusterNetId, t_bb> bb_num_on_edges_;
 
-    // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the bounding box coordinates of a net's bounding box
+    /// Store the bounding box coordinates of a net's bounding box
+    /// [0..cluster_ctx.clb_nlist.nets().size()-1]
     vtr::vector<ClusterNetId, t_bb> bb_coords_;
 
-    // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each of a net's bounding box (to allow efficient updates)
+    vtr::vector<ClusterNetId, std::pair<float, float>> net_avg_chann_util_;
+
+    /// Store the number of blocks on each of a net's bounding box (to allow efficient updates)
+    /// [0..cluster_ctx.clb_nlist.nets().size()-1]
     vtr::vector<ClusterNetId, std::vector<t_2D_bb>> layer_bb_num_on_edges_;
 
-    // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the bounding box coordinates of a net's bounding box
+    /// Store the bounding box coordinates of a net's bounding box
+    /// [0..cluster_ctx.clb_nlist.nets().size()-1]
     vtr::vector<ClusterNetId, std::vector<t_2D_bb>> layer_bb_coords_;
 
-    // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each layer ()
+    /// Store the number of blocks on each layer ()
+    /// [0..cluster_ctx.clb_nlist.nets().size()-1]
     vtr::Matrix<int> num_sink_pin_layer_;
 
     /**

From 3bee75c10b1e01ba1624f065582c01575bc5def6 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Sat, 19 Apr 2025 19:22:58 -0400
Subject: [PATCH 07/66] add NetCostHandler::get_net_cube_cong_cost_()

---
 vpr/src/place/net_cost_handler.cpp | 19 ++++++++++++++++++-
 vpr/src/place/net_cost_handler.h   |  2 ++
 2 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 7378623a9b9..dbe8434c739 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -109,7 +109,6 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
     if (cube_bb_) {
         ts_bb_edge_new_.resize(num_nets, t_bb());
         ts_bb_coord_new_.resize(num_nets, t_bb());
-
         ts_net_avg_chann_util_new_.resize(num_nets);
 
         bb_coords_.resize(num_nets, t_bb());
@@ -849,6 +848,12 @@ void NetCostHandler::update_bb_(ClusterNetId net_id,
     if (bb_update_status_[net_id] == NetUpdateState::NOT_UPDATED_YET) {
         bb_update_status_[net_id] = NetUpdateState::UPDATED_ONCE;
     }
+
+    // the average channel utilization that is going to be updated by this function
+    auto& [x_chan_util, y_chan_util] = ts_net_avg_chann_util_new_[net_id];
+    const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1);
+    x_chan_util = acc_chanx_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels;
+    y_chan_util = acc_chany_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels;
 }
 
 void NetCostHandler::update_layer_bb_(ClusterNetId net_id,
@@ -1376,6 +1381,17 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) {
     return ncost;
 }
 
+double NetCostHandler::get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts) {
+    auto [x_chan_cong, y_chan_cong] = use_ts ? ts_net_avg_chann_util_new_[net_id] : net_avg_chann_util_[net_id];
+
+    constexpr float threshold = 0.5f;
+
+    x_chan_cong = (x_chan_cong < threshold) ? 0.0f : x_chan_cong - threshold;
+    y_chan_cong = (y_chan_cong < threshold) ? 0.0f : y_chan_cong - threshold;
+
+    return x_chan_cong + y_chan_cong;
+}
+
 double NetCostHandler::get_net_per_layer_bb_cost_(ClusterNetId net_id, bool use_ts) {
     // Per-layer bounding box of the net
     const std::vector<t_2D_bb>& bb = use_ts ? layer_ts_bb_coord_new_[net_id] : layer_bb_coords_[net_id];
@@ -1721,6 +1737,7 @@ void NetCostHandler::estimate_routing_chann_util() {
 void NetCostHandler::set_ts_bb_coord_(const ClusterNetId net_id) {
     if (cube_bb_) {
         bb_coords_[net_id] = ts_bb_coord_new_[net_id];
+        net_avg_chann_util_[net_id] = ts_net_avg_chann_util_new_[net_id];
     } else {
         layer_bb_coords_[net_id] = layer_ts_bb_coord_new_[net_id];
     }
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index 524d6911091..2a2e4e804d3 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -528,6 +528,8 @@ class NetCostHandler {
      */
     double get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts);
 
+    double get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts);
+
     /**
      * @brief Given the per-layer BB, calculate the wire-length cost of the net on each layer
      * and return the sum of the costs

From d0908b9533f5a3fea4f012eb560313b443065d5e Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Sat, 19 Apr 2025 19:28:09 -0400
Subject: [PATCH 08/66] rename ts_net_avg_chann_util_new_ and
 net_avg_chann_util_

---
 vpr/src/place/net_cost_handler.cpp | 10 +++++-----
 vpr/src/place/net_cost_handler.h   |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index dbe8434c739..80cc1acb1d7 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -109,7 +109,7 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
     if (cube_bb_) {
         ts_bb_edge_new_.resize(num_nets, t_bb());
         ts_bb_coord_new_.resize(num_nets, t_bb());
-        ts_net_avg_chann_util_new_.resize(num_nets);
+        ts_avg_chann_util_new_.resize(num_nets);
 
         bb_coords_.resize(num_nets, t_bb());
         bb_num_on_edges_.resize(num_nets, t_bb());
@@ -537,7 +537,7 @@ void NetCostHandler::get_non_updatable_cube_bb_(ClusterNetId net_id, bool use_ts
     }
 
     // the average channel utilization that is going to be updated by this function
-    auto& [x_chan_util, y_chan_util] = use_ts ? ts_net_avg_chann_util_new_[net_id] : net_avg_chann_util_[net_id];
+    auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id];
     const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1);
     x_chan_util = acc_chanx_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels;
     y_chan_util = acc_chany_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels;
@@ -850,7 +850,7 @@ void NetCostHandler::update_bb_(ClusterNetId net_id,
     }
 
     // the average channel utilization that is going to be updated by this function
-    auto& [x_chan_util, y_chan_util] = ts_net_avg_chann_util_new_[net_id];
+    auto& [x_chan_util, y_chan_util] = ts_avg_chann_util_new_[net_id];
     const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1);
     x_chan_util = acc_chanx_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels;
     y_chan_util = acc_chany_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels;
@@ -1382,7 +1382,7 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) {
 }
 
 double NetCostHandler::get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts) {
-    auto [x_chan_cong, y_chan_cong] = use_ts ? ts_net_avg_chann_util_new_[net_id] : net_avg_chann_util_[net_id];
+    auto [x_chan_cong, y_chan_cong] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id];
 
     constexpr float threshold = 0.5f;
 
@@ -1737,7 +1737,7 @@ void NetCostHandler::estimate_routing_chann_util() {
 void NetCostHandler::set_ts_bb_coord_(const ClusterNetId net_id) {
     if (cube_bb_) {
         bb_coords_[net_id] = ts_bb_coord_new_[net_id];
-        net_avg_chann_util_[net_id] = ts_net_avg_chann_util_new_[net_id];
+        avg_chann_util_[net_id] = ts_avg_chann_util_new_[net_id];
     } else {
         layer_bb_coords_[net_id] = layer_ts_bb_coord_new_[net_id];
     }
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index 2a2e4e804d3..bf955400684 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -171,7 +171,7 @@ class NetCostHandler {
     /* [0...num_affected_nets] -> net_id of the affected nets */
     std::vector<ClusterNetId> ts_nets_to_update_;
 
-    vtr::vector<ClusterNetId, std::pair<float, float>> ts_net_avg_chann_util_new_;
+    vtr::vector<ClusterNetId, std::pair<float, float>> ts_avg_chann_util_new_;
 
     /// Store the number of blocks on each of a net's bounding box (to allow efficient updates)
     /// [0..cluster_ctx.clb_nlist.nets().size()-1]
@@ -181,7 +181,7 @@ class NetCostHandler {
     /// [0..cluster_ctx.clb_nlist.nets().size()-1]
     vtr::vector<ClusterNetId, t_bb> bb_coords_;
 
-    vtr::vector<ClusterNetId, std::pair<float, float>> net_avg_chann_util_;
+    vtr::vector<ClusterNetId, std::pair<float, float>> avg_chann_util_;
 
     /// Store the number of blocks on each of a net's bounding box (to allow efficient updates)
     /// [0..cluster_ctx.clb_nlist.nets().size()-1]

From 3a2afd3debd6ce0db8e94762c3cc69a6cff013d6 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Sun, 20 Apr 2025 17:18:37 -0400
Subject: [PATCH 09/66] update per net average chan util in
 get_bb_from_scratch_()

---
 vpr/src/place/net_cost_handler.cpp | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index ffa0977af2c..551370770d6 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -112,6 +112,8 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
         ts_avg_chann_util_new_.resize(num_nets);
 
         bb_coords_.resize(num_nets, t_bb());
+        avg_chann_util_.resize(num_nets);
+
         bb_num_on_edges_.resize(num_nets, t_bb());
         comp_bb_cost_functor_ = std::bind(&NetCostHandler::comp_cube_bb_cost_, this, std::placeholders::_1);
         update_bb_functor_ = std::bind(&NetCostHandler::update_bb_, this, std::placeholders::_1, std::placeholders::_2,
@@ -584,8 +586,6 @@ void NetCostHandler::update_bb_(ClusterNetId net_id,
                                 t_physical_tile_loc pin_new_loc,
                                 bool src_pin) {
     //TODO: account for multiple physical pin instances per logical pin
-    const t_bb *curr_bb_edge, *curr_bb_coord;
-
     const auto& device_ctx = g_vpr_ctx.device();
 
     const int num_layers = device_ctx.grid.get_num_layers();
@@ -605,6 +605,7 @@ void NetCostHandler::update_bb_(ClusterNetId net_id,
 
     vtr::NdMatrixProxy<int, 1> curr_num_sink_pin_layer = (bb_update_status_[net_id] == NetUpdateState::NOT_UPDATED_YET) ? num_sink_pin_layer_[size_t(net_id)] : num_sink_pin_layer_new;
 
+    const t_bb *curr_bb_edge, *curr_bb_coord;
     if (bb_update_status_[net_id] == NetUpdateState::NOT_UPDATED_YET) {
         /* The net had NOT been updated before, could use the old values */
         curr_bb_edge = &bb_num_on_edges_[net_id];
@@ -1285,6 +1286,12 @@ void NetCostHandler::get_bb_from_scratch_(ClusterNetId net_id, bool use_ts) {
     num_on_edges.ymax = ymax_edge;
     num_on_edges.layer_min = layer_min_edge;
     num_on_edges.layer_max = layer_max_edge;
+
+    // the average channel utilization that is going to be updated by this function
+    auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id];
+    const int total_channels = (coords.xmax - coords.xmin + 1) * (coords.ymax - coords.ymin + 1);
+    x_chan_util = acc_chanx_util_.get_sum(coords.xmin, coords.ymin, coords.xmax, coords.ymax) / total_channels;
+    y_chan_util = acc_chany_util_.get_sum(coords.xmin, coords.ymin, coords.xmax, coords.ymax) / total_channels;
 }
 
 void NetCostHandler::get_layer_bb_from_scratch_(ClusterNetId net_id,

From 758eb86fc52ccc81cf01d04d4aeea569129092f8 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Sun, 20 Apr 2025 17:42:39 -0400
Subject: [PATCH 10/66] add net_cong_cost_ and proposed_net_cong_cost_

---
 vpr/src/place/net_cost_handler.cpp | 19 +++++++++++++------
 vpr/src/place/net_cost_handler.h   |  9 +++++++--
 2 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 551370770d6..fe569b6f892 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -141,6 +141,8 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
     // negative net costs mean the cost is not valid.
     net_cost_.resize(num_nets, -1.);
     proposed_net_cost_.resize(num_nets, -1.);
+    net_cong_cost_.resize(num_nets, -1.);
+    proposed_net_cong_cost_.resize(num_nets, -1.);
 
     /* Used to store costs for moves not yet made and to indicate when a net's
      * cost has been recomputed. proposed_net_cost[inet] < 0 means net's cost hasn't
@@ -433,7 +435,6 @@ void NetCostHandler::update_td_delta_costs_(const PlaceDelayModel* delay_model,
     }
 }
 
-///@brief Record effected nets.
 void NetCostHandler::record_affected_net_(const ClusterNetId net) {
     /* Record effected nets. */
     if (proposed_net_cost_[net] < 0.) {
@@ -1536,13 +1537,15 @@ static double wirelength_crossing_count(size_t fanout) {
     }
 }
 
-void NetCostHandler::set_bb_delta_cost_(double& bb_delta_c) {
+void NetCostHandler::set_bb_delta_cost_(double& bb_delta_c, double& congestion_delta_c) {
     for (const ClusterNetId ts_net : ts_nets_to_update_) {
         ClusterNetId net_id = ts_net;
 
         proposed_net_cost_[net_id] = get_net_bb_cost_functor_(net_id);
+        proposed_net_cong_cost_[net_id] = get_net_cube_cong_cost_(net_id, /*use_ts=*/true);
 
         bb_delta_c += proposed_net_cost_[net_id] - net_cost_[net_id];
+        congestion_delta_c += proposed_net_cong_cost_[net_id] - net_cong_cost_[net_id];
     }
 }
 
@@ -1550,10 +1553,11 @@ void NetCostHandler::find_affected_nets_and_update_costs(const PlaceDelayModel*
                                                          const PlacerCriticalities* criticalities,
                                                          t_pl_blocks_to_be_moved& blocks_affected,
                                                          double& bb_delta_c,
-                                                         double& timing_delta_c) {
+                                                         double& timing_delta_c,
+                                                         double& congestion_delta_c) {
     VTR_ASSERT_DEBUG(bb_delta_c == 0.);
     VTR_ASSERT_DEBUG(timing_delta_c == 0.);
-    auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist;
+    const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist;
 
     ts_nets_to_update_.resize(0);
 
@@ -1581,12 +1585,12 @@ void NetCostHandler::find_affected_nets_and_update_costs(const PlaceDelayModel*
 
     /* Now update the bounding box costs (since the net bounding     *
      * boxes are up-to-date). The cost is only updated once per net. */
-    set_bb_delta_cost_(bb_delta_c);
+    set_bb_delta_cost_(bb_delta_c, congestion_delta_c);
 }
 
 void NetCostHandler::update_move_nets() {
     /* update net cost functions and reset flags. */
-    auto& cluster_ctx = g_vpr_ctx.clustering();
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
 
     for (const ClusterNetId ts_net : ts_nets_to_update_) {
         ClusterNetId net_id = ts_net;
@@ -1602,9 +1606,11 @@ void NetCostHandler::update_move_nets() {
         }
 
         net_cost_[net_id] = proposed_net_cost_[net_id];
+        net_cong_cost_[net_id] = proposed_net_cong_cost_[net_id];
 
         /* negative proposed_net_cost value is acting as a flag to mean not computed yet. */
         proposed_net_cost_[net_id] = -1;
+        proposed_net_cong_cost_[net_id] = -1;
         bb_update_status_[net_id] = NetUpdateState::NOT_UPDATED_YET;
     }
 }
@@ -1614,6 +1620,7 @@ void NetCostHandler::reset_move_nets() {
     for (const ClusterNetId ts_net : ts_nets_to_update_) {
         ClusterNetId net_id = ts_net;
         proposed_net_cost_[net_id] = -1;
+        proposed_net_cong_cost_[net_id] = -1;
         bb_update_status_[net_id] = NetUpdateState::NOT_UPDATED_YET;
     }
 }
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index 2546a5fb1eb..733267bba9c 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -91,7 +91,8 @@ class NetCostHandler {
                                              const PlacerCriticalities* criticalities,
                                              t_pl_blocks_to_be_moved& blocks_affected,
                                              double& bb_delta_c,
-                                             double& timing_delta_c);
+                                             double& timing_delta_c,
+                                             double& congestion_delta_c);
 
     /**
      * @brief Reset the net cost function flags (proposed_net_cost and bb_updated_before)
@@ -215,6 +216,10 @@ class NetCostHandler {
      */
     vtr::vector<ClusterNetId, double> net_cost_;
     vtr::vector<ClusterNetId, double> proposed_net_cost_;
+
+    vtr::vector<ClusterNetId, double> net_cong_cost_;
+    vtr::vector<ClusterNetId, double> proposed_net_cong_cost_;
+
     vtr::vector<ClusterNetId, NetUpdateState> bb_update_status_;
 
     /**
@@ -278,7 +283,7 @@ class NetCostHandler {
      * indicated in the blocks_affected data structure.
      * @param bb_delta_c Cost difference after and before moving the block
      */
-    void set_bb_delta_cost_(double& bb_delta_c);
+    void set_bb_delta_cost_(double& bb_delta_c, double& congestion_delta_c);
 
     /**
      * @brief Allocates and loads the chanx_place_cost_fac and chany_place_cost_fac arrays with the inverse of

From a79878aa72a192da8f8b33348d05de8a8dff2d08 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 21 Apr 2025 16:56:00 -0400
Subject: [PATCH 11/66] take congestion cost into account

---
 vpr/src/base/SetupVPR.cpp     |  1 +
 vpr/src/base/read_options.cpp | 10 +++++++---
 vpr/src/base/read_options.h   |  1 +
 vpr/src/base/vpr_types.h      |  1 +
 vpr/src/place/annealer.cpp    | 13 ++++++++-----
 vpr/src/place/place_util.h    |  2 ++
 6 files changed, 20 insertions(+), 8 deletions(-)

diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp
index 78b8f797ff5..7951eda6f7f 100644
--- a/vpr/src/base/SetupVPR.cpp
+++ b/vpr/src/base/SetupVPR.cpp
@@ -643,6 +643,7 @@ static void SetupPlacerOpts(const t_options& Options, t_placer_opts* PlacerOpts)
     PlacerOpts->recompute_crit_iter = Options.RecomputeCritIter;
 
     PlacerOpts->timing_tradeoff = Options.PlaceTimingTradeoff;
+    PlacerOpts->congestion_factor = Options.place_congestion_factor;
 
     /* Depends on PlacerOpts->place_algorithm */
     PlacerOpts->delay_offset = Options.place_delay_offset;
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 26f9b5bb132..6b335bae0ca 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -2412,12 +2412,16 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
     auto& place_timing_grp = parser.add_argument_group("timing-driven placement options");
 
     place_timing_grp.add_argument(args.PlaceTimingTradeoff, "--timing_tradeoff")
-        .help(
-            "Trade-off control between delay and wirelength during placement."
-            " 0.0 focuses completely on wirelength, 1.0 completely on timing")
+        .help("Trade-off control between delay and wirelength during placement. "
+              "0.0 focuses completely on wirelength, 1.0 completely on timing")
         .default_value("0.5")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
+    place_timing_grp.add_argument(args.place_congestion_factor, "--congestion_factor")
+        .help("To be written")
+        .default_value("0.0")
+        .show_in(argparse::ShowIn::HELP_ONLY);
+
     place_timing_grp.add_argument(args.RecomputeCritIter, "--recompute_crit_iter")
         .help("Controls how many temperature updates occur between timing analysis during placement")
         .default_value("1")
diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h
index dd1be4b2575..889ba3f256a 100644
--- a/vpr/src/base/read_options.h
+++ b/vpr/src/base/read_options.h
@@ -183,6 +183,7 @@ struct t_options {
 
     /* Timing-driven placement options only */
     argparse::ArgValue<float> PlaceTimingTradeoff;
+    argparse::ArgValue<float> place_congestion_factor;
     argparse::ArgValue<int> RecomputeCritIter;
     argparse::ArgValue<int> inner_loop_recompute_divider;
     argparse::ArgValue<int> quench_recompute_divider;
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 3bad2e48d3a..652ee8f8a2e 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -961,6 +961,7 @@ struct t_placer_opts {
     t_place_algorithm place_quench_algorithm;
     t_annealing_sched anneal_sched; ///<Placement option annealing schedule
     float timing_tradeoff;
+    float congestion_factor;
     int place_chan_width;
     enum e_pad_loc_type pad_loc_type;
     std::string constraints_file;
diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index 949dfe5b4c1..fe114e19d12 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -369,9 +369,10 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator,
 
     /* I'm using negative values of proposed_net_cost as a flag,
      * so DO NOT use cost functions that can go negative. */
-    double delta_c = 0;        //Change in cost due to this swap.
-    double bb_delta_c = 0;     //Change in the bounding box (wiring) cost.
-    double timing_delta_c = 0; //Change in the timing cost (delay * criticality).
+    double delta_c = 0.;        //Change in cost due to this swap.
+    double bb_delta_c = 0.;     //Change in the bounding box (wiring) cost.
+    double timing_delta_c = 0.; //Change in the timing cost (delay * criticality).
+    double congestion_delta_c = 0.;
 
     /* Allow some fraction of moves to not be restricted by rlim,
      * in the hopes of better escaping local minima. */
@@ -449,7 +450,7 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator,
          * delays and timing costs and store them in proposed_* data structures.
          */
         net_cost_handler_.find_affected_nets_and_update_costs(delay_model_, criticalities_, blocks_affected_,
-                                                              bb_delta_c, timing_delta_c);
+                                                              bb_delta_c, timing_delta_c, congestion_delta_c);
 
         if (place_algorithm == e_place_algorithm::CRITICALITY_TIMING_PLACE) {
             /* Take delta_c as a combination of timing and wiring cost. In
@@ -466,7 +467,8 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator,
                            timing_delta_c,
                            costs_.timing_cost_norm);
             delta_c = (1 - placer_opts_.timing_tradeoff) * bb_delta_c * costs_.bb_cost_norm
-                      + placer_opts_.timing_tradeoff * timing_delta_c * costs_.timing_cost_norm;
+                      + placer_opts_.timing_tradeoff * timing_delta_c * costs_.timing_cost_norm
+                      + placer_opts_.congestion_factor * congestion_delta_c * costs_.congestion_cost_norm;
         } else if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) {
             /* For setup slack analysis, we first do a timing analysis to get the newest
              * slack values resulted from the proposed block moves. If the move turns out
@@ -533,6 +535,7 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator,
         if (move_outcome == e_move_result::ACCEPTED) {
             costs_.cost += delta_c;
             costs_.bb_cost += bb_delta_c;
+            costs_.congestion_cost += congestion_delta_c;
 
             if (place_algorithm == e_place_algorithm::CRITICALITY_TIMING_PLACE) {
                 costs_.timing_cost += timing_delta_c;
diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h
index 14cf44455c6..3ae2f9f81f4 100644
--- a/vpr/src/place/place_util.h
+++ b/vpr/src/place/place_util.h
@@ -96,8 +96,10 @@ class t_placer_costs {
     double cost = 0.;
     double bb_cost = 0.;
     double timing_cost = 0.;
+    double congestion_cost = 0.;
     double bb_cost_norm = 0.;
     double timing_cost_norm = 0.;
+    double congestion_cost_norm = 0.;
 
     NocCostTerms noc_cost_terms;
     NocCostTerms noc_cost_norm_factors;

From fff89548b7e56414fe07ae2f9ef4f3dca8e9aa74 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 21 Apr 2025 18:29:17 -0400
Subject: [PATCH 12/66] recompute congestion cost in NetCostHandler

---
 vpr/src/place/net_cost_handler.cpp | 26 +++++++++++++++-----------
 vpr/src/place/net_cost_handler.h   |  2 +-
 2 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index fe569b6f892..156f78d3fe7 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -1511,19 +1511,21 @@ float NetCostHandler::get_chanz_cost_factor_(const t_bb& bb) {
     return z_cost_factor;
 }
 
-double NetCostHandler::recompute_bb_cost_() {
-    double cost = 0;
+std::pair<double, double> NetCostHandler::recompute_bb_cong_cost_() {
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
 
-    auto& cluster_ctx = g_vpr_ctx.clustering();
+    double bb_cost = 0.;
+    double cong_cost = 0.;
 
-    for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { /* for each net ... */
-        if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) {   /* Do only if not ignored. */
-            /* Bounding boxes don't have to be recomputed; they're correct. */
-            cost += net_cost_[net_id];
+    for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) {
+        if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
+            // Bounding boxes don't have to be recomputed; they're correct.
+            bb_cost += net_cost_[net_id];
+            cong_cost += net_cong_cost_[net_id];
         }
     }
 
-    return cost;
+    return {bb_cost, cong_cost};
 }
 
 static double wirelength_crossing_count(size_t fanout) {
@@ -1639,9 +1641,11 @@ void NetCostHandler::recompute_costs_from_scratch(const PlaceDelayModel* delay_m
         }
     };
 
-    double new_bb_cost = recompute_bb_cost_();
+    auto[new_bb_cost, new_cong_cost] = recompute_bb_cong_cost_();
     check_and_print_cost(new_bb_cost, costs.bb_cost, "bb_cost");
+    check_and_print_cost(new_cong_cost, costs.congestion_cost, "cong_cost");
     costs.bb_cost = new_bb_cost;
+    costs.congestion_cost = new_cong_cost;
 
     if (placer_opts_.place_algorithm.is_timing_driven()) {
         double new_timing_cost = 0.;
@@ -1658,8 +1662,8 @@ double NetCostHandler::get_total_wirelength_estimate() const {
     const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist;
 
     double estimated_wirelength = 0.0;
-    for (ClusterNetId net_id : clb_nlist.nets()) { /* for each net ... */
-        if (!clb_nlist.net_is_ignored(net_id)) {   /* Do only if not ignored. */
+    for (ClusterNetId net_id : clb_nlist.nets()) {
+        if (!clb_nlist.net_is_ignored(net_id)) {
             if (cube_bb_) {
                 estimated_wirelength += get_net_wirelength_estimate_(net_id);
             } else {
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index 733267bba9c..900f75657ea 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -517,7 +517,7 @@ class NetCostHandler {
      * This functions is called to do that for bb cost. It doesn't calculate the BBs from scratch, it would only add the costs again.
      * @return Total bb (wirelength) cost for the placement
      */
-    double recompute_bb_cost_();
+    std::pair<double, double> recompute_bb_cong_cost_();
 
     /**
      * @brief Given the 3D BB, calculate the wire-length cost of the net

From a326c981d4810a740f26cada65518ebf87fee34c Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 21 Apr 2025 18:30:13 -0400
Subject: [PATCH 13/66] update congestion cost norm and consider its
 contribution when computing total cost

---
 vpr/src/place/place_util.cpp | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index 1ac0899fbdf..6056e3d15b4 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -11,13 +11,18 @@
 #include "noc_place_utils.h"
 
 void t_placer_costs::update_norm_factors() {
+    const auto& clustered_nlist = g_vpr_ctx.clustering().clb_nlist;
+
+    bb_cost_norm = 1 / bb_cost;
+    if (congestion_cost > 0.) {
+        congestion_cost_norm = 1 / congestion_cost;
+    } else {
+        congestion_cost_norm = 1. / (double)clustered_nlist.nets().size();
+    }
+
     if (place_algorithm.is_timing_driven()) {
-        bb_cost_norm = 1 / bb_cost;
         //Prevent the norm factor from going to infinity
         timing_cost_norm = std::min(1 / timing_cost, MAX_INV_TIMING_COST);
-    } else {
-        VTR_ASSERT_SAFE(place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE);
-        bb_cost_norm = 1 / bb_cost; //Updating the normalization factor in bounding box mode since the cost in this mode is determined after normalizing the wirelength cost
     }
 
     if (noc_enabled) {
@@ -36,6 +41,8 @@ double t_placer_costs::get_total_cost(const t_placer_opts& placer_opts, const t_
         total_cost = (1 - placer_opts.timing_tradeoff) * (bb_cost * bb_cost_norm) + (placer_opts.timing_tradeoff) * (timing_cost * timing_cost_norm);
     }
 
+    total_cost += congestion_cost * congestion_cost_norm;
+
     if (noc_opts.noc) {
         // in noc mode we include noc aggregate bandwidth, noc latency, and noc congestion
         total_cost += calculate_noc_cost(noc_cost_terms, noc_cost_norm_factors, noc_opts);

From 0a2634e2da436ae7878e92afa2d8b5175554ff09 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 21 Apr 2025 18:34:26 -0400
Subject: [PATCH 14/66] compute average congestion cost in t_placer_statistics

---
 vpr/src/place/place_util.cpp | 4 ++++
 vpr/src/place/place_util.h   | 1 +
 2 files changed, 5 insertions(+)

diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index 6056e3d15b4..2caead2fcbd 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -83,6 +83,7 @@ void t_placer_statistics::reset() {
     av_cost = 0.;
     av_bb_cost = 0.;
     av_timing_cost = 0.;
+    av_cong_cost = 0.;
     sum_of_squares = 0.;
     success_sum = 0;
     success_rate = 0.;
@@ -95,6 +96,7 @@ void t_placer_statistics::single_swap_update(const t_placer_costs& costs) {
     av_cost += costs.cost;
     av_bb_cost += costs.bb_cost;
     av_timing_cost += costs.timing_cost;
+    av_cong_cost += costs.congestion_cost;
     sum_of_squares += (costs.cost) * (costs.cost);
 }
 
@@ -104,10 +106,12 @@ void t_placer_statistics::calc_iteration_stats(const t_placer_costs& costs, int
         av_cost = costs.cost;
         av_bb_cost = costs.bb_cost;
         av_timing_cost = costs.timing_cost;
+        av_cong_cost = costs.congestion_cost;
     } else {
         av_cost /= success_sum;
         av_bb_cost /= success_sum;
         av_timing_cost /= success_sum;
+        av_cong_cost /= success_sum;
     }
     success_rate = success_sum / float(move_lim);
     std_dev = get_std_dev(success_sum, sum_of_squares, av_cost);
diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h
index 3ae2f9f81f4..b8ef75f8dc7 100644
--- a/vpr/src/place/place_util.h
+++ b/vpr/src/place/place_util.h
@@ -179,6 +179,7 @@ class t_placer_statistics {
     double av_cost;
     double av_bb_cost;
     double av_timing_cost;
+    double av_cong_cost;
     double sum_of_squares;
     int success_sum;
     float success_rate;

From f3ec24a76a28842da8b3e5a01a18d40e265e7d29 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Tue, 22 Apr 2025 14:08:17 -0400
Subject: [PATCH 15/66] compute congestion cost from scratch in
 comp_bb_cong_cost() and assign it to t_placer_costs::congestion_cost

---
 vpr/src/place/net_cost_handler.cpp | 51 ++++++++++++++++++++----------
 vpr/src/place/net_cost_handler.h   |  8 ++---
 vpr/src/place/place_util.cpp       |  4 +++
 vpr/src/place/placer.cpp           | 17 ++++------
 4 files changed, 49 insertions(+), 31 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 156f78d3fe7..04104c0f5d6 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -115,7 +115,7 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
         avg_chann_util_.resize(num_nets);
 
         bb_num_on_edges_.resize(num_nets, t_bb());
-        comp_bb_cost_functor_ = std::bind(&NetCostHandler::comp_cube_bb_cost_, this, std::placeholders::_1);
+        comp_bb_cong_cost_functor_ = std::bind(&NetCostHandler::comp_cube_bb_cong_cost_, this, std::placeholders::_1);
         update_bb_functor_ = std::bind(&NetCostHandler::update_bb_, this, std::placeholders::_1, std::placeholders::_2,
                                        std::placeholders::_3, std::placeholders::_4);
         get_net_bb_cost_functor_ = std::bind(&NetCostHandler::get_net_cube_bb_cost_, this, std::placeholders::_1, /*use_ts=*/true);
@@ -125,7 +125,7 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
         layer_ts_bb_coord_new_.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
         layer_bb_num_on_edges_.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
         layer_bb_coords_.resize(num_nets, std::vector<t_2D_bb>(num_layers, t_2D_bb()));
-        comp_bb_cost_functor_ = std::bind(&NetCostHandler::comp_per_layer_bb_cost_, this, std::placeholders::_1);
+        comp_bb_cong_cost_functor_ = std::bind(&NetCostHandler::comp_per_layer_bb_cost_, this, std::placeholders::_1);
         update_bb_functor_ = std::bind(&NetCostHandler::update_layer_bb_, this, std::placeholders::_1, std::placeholders::_2,
                                        std::placeholders::_3, std::placeholders::_4);
         get_net_bb_cost_functor_ = std::bind(&NetCostHandler::get_net_per_layer_bb_cost_, this, std::placeholders::_1, /*use_ts=*/true);
@@ -252,20 +252,21 @@ void NetCostHandler::alloc_and_load_for_fast_vertical_cost_update_() {
                                                          });
 }
 
-std::pair<double, double> NetCostHandler::comp_bb_cost(e_cost_methods method) {
-    return comp_bb_cost_functor_(method);
+std::tuple<double, double, double> NetCostHandler::comp_bb_cong_cost(e_cost_methods method) {
+    return comp_bb_cong_cost_functor_(method);
 }
 
-std::pair<double, double> NetCostHandler::comp_cube_bb_cost_(e_cost_methods method) {
+std::tuple<double, double, double> NetCostHandler::comp_cube_bb_cong_cost_(e_cost_methods method) {
     const auto& cluster_ctx = g_vpr_ctx.clustering();
 
-    double cost = 0;
-    double expected_wirelength = 0.0;
+    double bb_cost = 0.;
+    double expected_wirelength = 0.;
+    double cong_cost = 0.;
 
-    for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { /* for each net ... */
-        if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) {   /* Do only if not ignored. */
-            /* Small nets don't use incremental updating on their bounding boxes, *
-             * so they can use a fast bounding box calculator.                    */
+    for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) {
+        if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
+            /* Small nets don't use incremental updating on their bounding boxes,
+             * so they can use a fast bounding box calculator. */
             if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET && method == e_cost_methods::NORMAL) {
                 get_bb_from_scratch_(net_id, /*use_ts=*/false);
             } else {
@@ -273,21 +274,35 @@ std::pair<double, double> NetCostHandler::comp_cube_bb_cost_(e_cost_methods meth
             }
 
             net_cost_[net_id] = get_net_cube_bb_cost_(net_id, /*use_ts=*/false);
-            cost += net_cost_[net_id];
+            bb_cost += net_cost_[net_id];
             if (method == e_cost_methods::CHECK) {
                 expected_wirelength += get_net_wirelength_estimate_(net_id);
             }
         }
     }
 
-    return {cost, expected_wirelength};
+    // Now that all bounding boxes are computed from scratch, we recompute the channel utilization
+    estimate_routing_chann_util();
+
+    // Compute congestion cost using recomputed bounding boxes and channel utilization map
+    for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) {
+        if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
+            net_cong_cost_[net_id] = get_net_cube_cong_cost_(net_id, /*use_ts=*/false);
+            cong_cost += net_cong_cost_[net_id];
+        }
+    }
+
+
+    return {bb_cost, expected_wirelength, cong_cost};
 }
 
-std::pair<double, double> NetCostHandler::comp_per_layer_bb_cost_(e_cost_methods method) {
+std::tuple<double, double, double> NetCostHandler::comp_per_layer_bb_cost_(e_cost_methods method) {
     const auto& cluster_ctx = g_vpr_ctx.clustering();
 
-    double cost = 0;
-    double expected_wirelength = 0.0;
+    double cost = 0.;
+    double expected_wirelength = 0.;
+    // TODO: compute congestion cost
+    constexpr double cong_cost = 0.;
 
     for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { /* for each net ... */
         if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) {   /* Do only if not ignored. */
@@ -310,7 +325,9 @@ std::pair<double, double> NetCostHandler::comp_per_layer_bb_cost_(e_cost_methods
         }
     }
 
-    return {cost, expected_wirelength};
+
+
+    return {cost, expected_wirelength, cong_cost};
 }
 
 void NetCostHandler::update_net_bb_(const ClusterNetId net,
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index e94990461c8..0c76eaf6975 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -62,7 +62,7 @@ class NetCostHandler {
      *
      * @note The returned estimated wirelength is valid only when method == CHECK
      */
-    std::pair<double, double> comp_bb_cost(e_cost_methods method);
+    std::tuple<double, double, double> comp_bb_cong_cost(e_cost_methods method);
 
     /**
      * @brief Find all the nets and pins affected by this swap and update costs.
@@ -139,7 +139,7 @@ class NetCostHandler {
     ///@brief Contains some parameter that determine how the placement cost is computed.
     const t_placer_opts& placer_opts_;
     ///@brief Points to the proper method for computing the bounding box cost from scratch.
-    std::function<std::pair<double, double>(e_cost_methods method)> comp_bb_cost_functor_;
+    std::function<std::tuple<double, double, double>(e_cost_methods method)> comp_bb_cong_cost_functor_;
     ///@brief Points to the proper method for updating the bounding box of a net.
     std::function<void(ClusterNetId net_id, t_physical_tile_loc pin_old_loc, t_physical_tile_loc pin_new_loc, bool is_driver)> update_bb_functor_;
     ///@brief Points to the proper method for getting the bounding box cost of a net
@@ -502,7 +502,7 @@ class NetCostHandler {
      *
      * @note The returned estimated wirelength is valid only when method == CHECK
      */
-    std::pair<double, double> comp_per_layer_bb_cost_(e_cost_methods method);
+    std::tuple<double, double, double> comp_per_layer_bb_cost_(e_cost_methods method);
 
     /**
      * @brief Computes the bounding box from scratch using 3D bounding boxes (cube mode)
@@ -512,7 +512,7 @@ class NetCostHandler {
      *
      * @note The returned estimated wirelength is valid only when method == CHECK
      */
-    std::pair<double, double> comp_cube_bb_cost_(e_cost_methods method);
+    std::tuple<double, double, double> comp_cube_bb_cong_cost_(e_cost_methods method);
 
     /**
      * @brief if "net" is not already stored as an affected net, add it in ts_nets_to_update.
diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index 2caead2fcbd..f062fb3ab05 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -14,6 +14,7 @@ void t_placer_costs::update_norm_factors() {
     const auto& clustered_nlist = g_vpr_ctx.clustering().clb_nlist;
 
     bb_cost_norm = 1 / bb_cost;
+
     if (congestion_cost > 0.) {
         congestion_cost_norm = 1 / congestion_cost;
     } else {
@@ -23,6 +24,9 @@ void t_placer_costs::update_norm_factors() {
     if (place_algorithm.is_timing_driven()) {
         //Prevent the norm factor from going to infinity
         timing_cost_norm = std::min(1 / timing_cost, MAX_INV_TIMING_COST);
+    } else {
+        // Timing normalization factor is not used
+        timing_cost_norm = std::numeric_limits<double>::quiet_NaN();
     }
 
     if (noc_enabled) {
diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp
index 7ae8f5f384e..3e7684d2c77 100644
--- a/vpr/src/place/placer.cpp
+++ b/vpr/src/place/placer.cpp
@@ -125,19 +125,18 @@ Placer::Placer(const Netlist<>& net_list,
     }
 
     // Gets initial cost and loads bounding boxes.
-    costs_.bb_cost = net_cost_handler_.comp_bb_cost(e_cost_methods::NORMAL).first;
-    costs_.bb_cost_norm = 1 / costs_.bb_cost;
+    std::tie(costs_.bb_cost, std::ignore, costs_.congestion_cost) = net_cost_handler_.comp_bb_cong_cost(e_cost_methods::NORMAL);
 
     if (placer_opts.place_algorithm.is_timing_driven()) {
         alloc_and_init_timing_objects_(net_list, analysis_opts);
     } else {
         VTR_ASSERT(placer_opts.place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE);
-        // Timing cost and normalization factors are not used
-        constexpr double INVALID_COST = std::numeric_limits<double>::quiet_NaN();
-        costs_.timing_cost = INVALID_COST;
-        costs_.timing_cost_norm = INVALID_COST;
+        // Timing cost is not used
+        costs_.timing_cost = std::numeric_limits<double>::quiet_NaN();;
     }
 
+    costs_.update_norm_factors();
+
     if (noc_opts.noc) {
         VTR_ASSERT(noc_cost_handler_.has_value());
 
@@ -222,8 +221,6 @@ void Placer::alloc_and_init_timing_objects_(const Netlist<>& net_list,
         write_setup_timing_graph_dot(getEchoFileName(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH) + std::string(".dot"),
                                      *timing_info_, debug_tnode);
     }
-
-    costs_.timing_cost_norm = 1 / costs_.timing_cost;
 }
 
 void Placer::check_place_() {
@@ -264,9 +261,8 @@ void Placer::check_place_() {
 
 int Placer::check_placement_costs_() {
     int error = 0;
-    double timing_cost_check;
 
-    const auto [bb_cost_check, expected_wirelength] = net_cost_handler_.comp_bb_cost(e_cost_methods::CHECK);
+    const auto [bb_cost_check, expected_wirelength, _] = net_cost_handler_.comp_bb_cong_cost(e_cost_methods::CHECK);
 
     if (fabs(bb_cost_check - costs_.bb_cost) > costs_.bb_cost * PL_INCREMENTAL_COST_TOLERANCE) {
         VTR_LOG_ERROR(
@@ -276,6 +272,7 @@ int Placer::check_placement_costs_() {
     }
 
     if (placer_opts_.place_algorithm.is_timing_driven()) {
+        double timing_cost_check;
         comp_td_costs(place_delay_model_.get(), *placer_criticalities_, placer_state_, &timing_cost_check);
         if (fabs(timing_cost_check - costs_.timing_cost) > costs_.timing_cost * PL_INCREMENTAL_COST_TOLERANCE) {
             VTR_LOG_ERROR(

From 57b3c86974e51bec6bf47d65cd39fcec683e3729 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 23 Apr 2025 14:57:07 -0400
Subject: [PATCH 16/66] initialize acc_chanx_util_ and acc_chany_util_ in
 constructor

---
 vpr/src/place/net_cost_handler.cpp | 69 +++++++++++++-----------------
 vpr/src/place/net_cost_handler.h   |  4 ++
 vpr/src/place/placer.cpp           |  2 -
 3 files changed, 34 insertions(+), 41 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 04104c0f5d6..22336c552ea 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -100,7 +100,9 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
     : cube_bb_(cube_bb)
     , placer_state_(placer_state)
     , placer_opts_(placer_opts) {
-    const int num_layers = g_vpr_ctx.device().grid.get_num_layers();
+    const auto& device_ctx = g_vpr_ctx.device();
+
+    const int num_layers = device_ctx.grid.get_num_layers();
     const size_t num_nets = g_vpr_ctx.clustering().clb_nlist.nets().size();
 
     is_multi_layer_ = num_layers > 1;
@@ -150,6 +152,21 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
     bb_update_status_.resize(num_nets, NetUpdateState::NOT_UPDATED_YET);
 
     alloc_and_load_chan_w_factors_for_place_cost_();
+
+    chanx_util_ = vtr::Matrix<double>({{
+                                          device_ctx.grid.width(),     //[0 .. device_ctx.grid.width() - 1] (length of x channel)
+                                          device_ctx.grid.height() - 1 //[0 .. device_ctx.grid.height() - 2] (# x channels)
+                                      }},
+                                      0);
+
+    chany_util_ = vtr::Matrix<double>({{
+                                          device_ctx.grid.width() - 1, //[0 .. device_ctx.grid.width() - 2] (# y channels)
+                                          device_ctx.grid.height()     //[0 .. device_ctx.grid.height() - 1] (length of y channel)
+                                      }},
+                                      0);
+
+    acc_chanx_util_ = vtr::PrefixSum2D<double>(chanx_util_);
+    acc_chany_util_ = vtr::PrefixSum2D<double>(chany_util_);
 }
 
 void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_() {
@@ -1696,17 +1713,8 @@ void NetCostHandler::estimate_routing_chann_util() {
     const auto& cluster_ctx = g_vpr_ctx.clustering();
     const auto& device_ctx = g_vpr_ctx.device();
 
-    auto chanx_util = vtr::Matrix<double>({{
-                                          device_ctx.grid.width(),     //[0 .. device_ctx.grid.width() - 1] (length of x channel)
-                                          device_ctx.grid.height() - 1 //[0 .. device_ctx.grid.height() - 2] (# x channels)
-                                      }},
-                                      0);
-
-    auto chany_util = vtr::Matrix<double>({{
-                                          device_ctx.grid.width() - 1, //[0 .. device_ctx.grid.width() - 2] (# y channels)
-                                          device_ctx.grid.height()     //[0 .. device_ctx.grid.height() - 1] (length of y channel)
-                                      }},
-                                      0);
+    chanx_util_.fill(0.);
+    chany_util_.fill(0.);
 
     for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) {
         if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
@@ -1725,46 +1733,29 @@ void NetCostHandler::estimate_routing_chann_util() {
 
             for (int x = bb.xmin; x <= bb.xmax; x++) {
                 for (int y = bb.ymin; y <= bb.ymax; y++) {
-                    chanx_util[x][y] += expected_per_x_segment_wl;
-                    chany_util[x][y] += expected_per_y_segment_wl;
+                    chanx_util_[x][y] += expected_per_x_segment_wl;
+                    chany_util_[x][y] += expected_per_y_segment_wl;
                 }
             }
         }
     }
 
-    auto chanx_occ_int = vtr::Matrix<int>({{
-                                              device_ctx.grid.width(),
-                                              device_ctx.grid.height() - 1
-                                          }},
-                                          0);
-
-    auto chany_occ_int = vtr::Matrix<int>({{
-                                              device_ctx.grid.width() - 1,
-                                              device_ctx.grid.height()
-                                          }},
-                                          0);
-
     const t_chan_width& chan_width = device_ctx.chan_width;
 
-    for (size_t x = 0; x < chanx_util.dim_size(0); ++x) {
-        for (size_t y = 0; y < chanx_util.dim_size(1); ++y) {
-            chanx_occ_int[x][y] = static_cast<int>(std::round(chanx_util[x][y]));
-            chanx_util[x][y] /= chan_width.x_list[y];
+    for (size_t x = 0; x < chanx_util_.dim_size(0); ++x) {
+        for (size_t y = 0; y < chanx_util_.dim_size(1); ++y) {
+            chanx_util_[x][y] /= chan_width.x_list[y];
         }
     }
 
-    for (size_t x = 0; x < chany_util.dim_size(0); ++x) {
-        for (size_t y = 0; y < chany_util.dim_size(1); ++y) {
-            chany_occ_int[x][y] = static_cast<int>(std::round(chany_util[x][y]));
-            chany_util[x][y] /= chan_width.y_list[x];
+    for (size_t x = 0; x < chany_util_.dim_size(0); ++x) {
+        for (size_t y = 0; y < chany_util_.dim_size(1); ++y) {
+            chany_util_[x][y] /= chan_width.y_list[x];
         }
     }
 
-    write_channel_occupancy_table("place_chanx_occupancy.txt", chanx_occ_int, device_ctx.chan_width.x_list);
-    write_channel_occupancy_table("place_chany_occupancy.txt", chany_occ_int, device_ctx.chan_width.y_list);
-
-    acc_chanx_util_ = vtr::PrefixSum2D<double>(chanx_util);
-    acc_chany_util_ = vtr::PrefixSum2D<double>(chany_util);
+    acc_chanx_util_ = vtr::PrefixSum2D<double>(chanx_util_);
+    acc_chany_util_ = vtr::PrefixSum2D<double>(chany_util_);
 }
 
 void NetCostHandler::set_ts_bb_coord_(const ClusterNetId net_id) {
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index 0c76eaf6975..38d5dbb0d72 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -246,6 +246,10 @@ class NetCostHandler {
     vtr::PrefixSum2D<double> acc_chany_util_;
 
 
+    vtr::Matrix<double> chanx_util_;
+    vtr::Matrix<double> chany_util_;
+
+
     /**
      * @brief The matrix below is used to calculate a chanz_place_cost_fac based on the average channel width in 
      * the cross-die-layer direction over a 2D (x,y) region. We don't assume the inter-die connectivity is the same at all (x,y) locations, so we
diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp
index 3e7684d2c77..a96ac02c2a1 100644
--- a/vpr/src/place/placer.cpp
+++ b/vpr/src/place/placer.cpp
@@ -386,8 +386,6 @@ void Placer::place() {
     check_place_();
 
     log_printer_.print_post_placement_stats();
-
-    net_cost_handler_.estimate_routing_chann_util();
 }
 
 void Placer::copy_locs_to_global_state(PlacementContext& place_ctx) {

From 01d917e30e7119e41d7c44ee98ea75543a2c21df Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 23 Apr 2025 15:17:13 -0400
Subject: [PATCH 17/66] add --congestion_acceptance_rate_trigger commmand line
 option and enable congestion modeling when acceptance rate drops below this
 value

---
 vpr/src/base/SetupVPR.cpp          | 1 +
 vpr/src/base/read_options.cpp      | 5 +++++
 vpr/src/base/read_options.h        | 2 ++
 vpr/src/base/vpr_types.h           | 3 ++-
 vpr/src/place/annealer.cpp         | 8 +++++++-
 vpr/src/place/annealer.h           | 2 ++
 vpr/src/place/net_cost_handler.cpp | 1 +
 7 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp
index 17a9025497c..ad4d48687d5 100644
--- a/vpr/src/base/SetupVPR.cpp
+++ b/vpr/src/base/SetupVPR.cpp
@@ -645,6 +645,7 @@ static void SetupPlacerOpts(const t_options& Options, t_placer_opts* PlacerOpts)
 
     PlacerOpts->timing_tradeoff = Options.PlaceTimingTradeoff;
     PlacerOpts->congestion_factor = Options.place_congestion_factor;
+    PlacerOpts->congestion_acceptance_rate_trigger = Options.place_congestion_acceptance_rate_trigger;
 
     /* Depends on PlacerOpts->place_algorithm */
     PlacerOpts->delay_offset = Options.place_delay_offset;
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 7dd74d07517..026d7967bbf 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -2429,6 +2429,11 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
         .default_value("0.0")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
+    place_timing_grp.add_argument(args.place_congestion_acceptance_rate_trigger, "--congestion_acceptance_rate_trigger")
+        .help("To be written")
+        .default_value("0.0")
+        .show_in(argparse::ShowIn::HELP_ONLY);
+
     place_timing_grp.add_argument(args.RecomputeCritIter, "--recompute_crit_iter")
         .help("Controls how many temperature updates occur between timing analysis during placement")
         .default_value("1")
diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h
index 276d4efc9b0..12e89e382ef 100644
--- a/vpr/src/base/read_options.h
+++ b/vpr/src/base/read_options.h
@@ -185,6 +185,8 @@ struct t_options {
     /* Timing-driven placement options only */
     argparse::ArgValue<float> PlaceTimingTradeoff;
     argparse::ArgValue<float> place_congestion_factor;
+    argparse::ArgValue<float> place_congestion_acceptance_rate_trigger;
+
     argparse::ArgValue<int> RecomputeCritIter;
     argparse::ArgValue<int> inner_loop_recompute_divider;
     argparse::ArgValue<int> quench_recompute_divider;
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 0b3762d3ca8..ee94f8dada3 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -962,6 +962,7 @@ struct t_placer_opts {
     t_annealing_sched anneal_sched; ///<Placement option annealing schedule
     float timing_tradeoff;
     float congestion_factor;
+    float congestion_acceptance_rate_trigger;
     int place_chan_width;
     enum e_pad_loc_type pad_loc_type;
     std::string constraints_file;
@@ -1028,7 +1029,7 @@ struct t_placer_opts {
 
     e_place_delta_delay_algorithm place_delta_delay_matrix_calculation_method;
 
-    /*
+    /**
      * @brief enables the analytic placer.
      *
      * Once analytic placement is done, the result is passed through the quench phase
diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index fe114e19d12..90d253790bb 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -223,7 +223,8 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts,
     , move_stats_file_(nullptr, vtr::fclose)
     , outer_crit_iter_count_(1)
     , blocks_affected_(placer_state.block_locs().size())
-    , quench_started_(false) {
+    , quench_started_(false)
+    , congestion_modeling_started_(false) {
     const auto& device_ctx = g_vpr_ctx.device();
 
     float first_crit_exponent;
@@ -753,6 +754,11 @@ void PlacementAnnealer::placement_inner_loop() {
     // Calculate the success_rate and std_dev of the costs.
     placer_stats_.calc_iteration_stats(costs_, annealing_state_.move_lim);
 
+    if (congestion_modeling_started_ || placer_stats_.success_rate < placer_opts_.congestion_acceptance_rate_trigger) {
+        net_cost_handler_.estimate_routing_chann_util();
+        congestion_modeling_started_ = true;
+    }
+
     // update the RL agent's state
     if (!quench_started_) {
         if (placer_opts_.place_algorithm.is_timing_driven() && placer_opts_.place_agent_multistate && agent_state_ == e_agent_state::EARLY_IN_THE_ANNEAL) {
diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h
index 66abceea1e3..07fd4fed3d3 100644
--- a/vpr/src/place/annealer.h
+++ b/vpr/src/place/annealer.h
@@ -328,6 +328,8 @@ class PlacementAnnealer {
     /// Indicates whether the annealer has entered into the quench stage
     bool quench_started_;
 
+    bool congestion_modeling_started_;
+
     void LOG_MOVE_STATS_HEADER();
     void LOG_MOVE_STATS_PROPOSED();
     void LOG_MOVE_STATS_OUTCOME(double delta_cost, double delta_bb_cost, double delta_td_cost, const char* outcome, const char* reason);
diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 22336c552ea..a54a38ffb93 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -196,6 +196,7 @@ void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_() {
 
         return chan_x_width;
     });
+
     acc_chany_width_ = vtr::PrefixSum1D<int>(grid_width, [&](size_t x) noexcept {
         int chan_y_width = device_ctx.chan_width.y_list[x];
 

From e5a51b4162682a9631672e23725a4542844de430 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 23 Apr 2025 15:51:40 -0400
Subject: [PATCH 18/66] added --congestion_chan_util_threshold command line
 option

---
 vpr/src/base/SetupVPR.cpp          | 1 +
 vpr/src/base/read_options.cpp      | 5 +++++
 vpr/src/base/read_options.h        | 1 +
 vpr/src/base/vpr_types.h           | 1 +
 vpr/src/place/net_cost_handler.cpp | 4 ++--
 5 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp
index ad4d48687d5..06484a41a6c 100644
--- a/vpr/src/base/SetupVPR.cpp
+++ b/vpr/src/base/SetupVPR.cpp
@@ -646,6 +646,7 @@ static void SetupPlacerOpts(const t_options& Options, t_placer_opts* PlacerOpts)
     PlacerOpts->timing_tradeoff = Options.PlaceTimingTradeoff;
     PlacerOpts->congestion_factor = Options.place_congestion_factor;
     PlacerOpts->congestion_acceptance_rate_trigger = Options.place_congestion_acceptance_rate_trigger;
+    PlacerOpts->congestion_chan_util_threshold = Options.place_congestion_chan_util_threshold;
 
     /* Depends on PlacerOpts->place_algorithm */
     PlacerOpts->delay_offset = Options.place_delay_offset;
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 026d7967bbf..646e419f1af 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -2434,6 +2434,11 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
         .default_value("0.0")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
+    place_timing_grp.add_argument(args.place_congestion_chan_util_threshold, "--congestion_chan_util_threshold")
+        .help("To be written")
+        .default_value("1.0")
+        .show_in(argparse::ShowIn::HELP_ONLY);
+
     place_timing_grp.add_argument(args.RecomputeCritIter, "--recompute_crit_iter")
         .help("Controls how many temperature updates occur between timing analysis during placement")
         .default_value("1")
diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h
index 12e89e382ef..4a3a8b0c05b 100644
--- a/vpr/src/base/read_options.h
+++ b/vpr/src/base/read_options.h
@@ -186,6 +186,7 @@ struct t_options {
     argparse::ArgValue<float> PlaceTimingTradeoff;
     argparse::ArgValue<float> place_congestion_factor;
     argparse::ArgValue<float> place_congestion_acceptance_rate_trigger;
+    argparse::ArgValue<float> place_congestion_chan_util_threshold;
 
     argparse::ArgValue<int> RecomputeCritIter;
     argparse::ArgValue<int> inner_loop_recompute_divider;
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index ee94f8dada3..b1b03bdbe5c 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -963,6 +963,7 @@ struct t_placer_opts {
     float timing_tradeoff;
     float congestion_factor;
     float congestion_acceptance_rate_trigger;
+    float congestion_chan_util_threshold;
     int place_chan_width;
     enum e_pad_loc_type pad_loc_type;
     std::string constraints_file;
diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index a54a38ffb93..04c04a503ca 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -1390,7 +1390,7 @@ void NetCostHandler::get_layer_bb_from_scratch_(ClusterNetId net_id,
 
 double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) {
     // Finds the cost due to one net by looking at its coordinate bounding box.
-    auto& cluster_ctx = g_vpr_ctx.clustering();
+    const auto& cluster_ctx = g_vpr_ctx.clustering();
 
     const t_bb& bb = use_ts ? ts_bb_coord_new_[net_id] : bb_coords_[net_id];
 
@@ -1425,7 +1425,7 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) {
 double NetCostHandler::get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts) {
     auto [x_chan_cong, y_chan_cong] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id];
 
-    constexpr float threshold = 0.5f;
+    const float threshold = placer_opts_.congestion_chan_util_threshold;
 
     x_chan_cong = (x_chan_cong < threshold) ? 0.0f : x_chan_cong - threshold;
     y_chan_cong = (y_chan_cong < threshold) ? 0.0f : y_chan_cong - threshold;

From cd6215a55f2e8378e4949f565a39f17e782c5d1c Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Thu, 24 Apr 2025 11:13:18 -0400
Subject: [PATCH 19/66] make find_subtile_in_location() definition static

---
 vpr/src/place/initial_placement.cpp | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/vpr/src/place/initial_placement.cpp b/vpr/src/place/initial_placement.cpp
index aac91e0fd65..21ebdfa2570 100644
--- a/vpr/src/place/initial_placement.cpp
+++ b/vpr/src/place/initial_placement.cpp
@@ -366,11 +366,11 @@ static bool is_loc_legal(const t_pl_loc& loc,
     return legal;
 }
 
-bool find_subtile_in_location(t_pl_loc& centroid,
-                              t_logical_block_type_ptr block_type,
-                              const BlkLocRegistry& blk_loc_registry,
-                              const PartitionRegion& pr,
-                              vtr::RngContainer& rng) {
+static bool find_subtile_in_location(t_pl_loc& centroid,
+                                     t_logical_block_type_ptr block_type,
+                                     const BlkLocRegistry& blk_loc_registry,
+                                     const PartitionRegion& pr,
+                                     vtr::RngContainer& rng) {
     //check if the location is on chip and legal, if yes try to update subtile
     if (is_loc_on_chip({centroid.x, centroid.y, centroid.layer}) && is_loc_legal(centroid, pr, block_type)) {
         //find the compatible subtiles

From 8b395aaea29cb07cb877ebde66c35bca875d50b7 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Thu, 24 Apr 2025 11:18:01 -0400
Subject: [PATCH 20/66] include the last column and row in chan?_util_

---
 vpr/src/place/net_cost_handler.cpp | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 04c04a503ca..7e8604477dc 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -111,10 +111,10 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
     if (cube_bb_) {
         ts_bb_edge_new_.resize(num_nets, t_bb());
         ts_bb_coord_new_.resize(num_nets, t_bb());
-        ts_avg_chann_util_new_.resize(num_nets);
+        ts_avg_chann_util_new_.resize(num_nets, {0., 0.});
 
         bb_coords_.resize(num_nets, t_bb());
-        avg_chann_util_.resize(num_nets);
+        avg_chann_util_.resize(num_nets, {0., 0.});
 
         bb_num_on_edges_.resize(num_nets, t_bb());
         comp_bb_cong_cost_functor_ = std::bind(&NetCostHandler::comp_cube_bb_cong_cost_, this, std::placeholders::_1);
@@ -155,12 +155,12 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
 
     chanx_util_ = vtr::Matrix<double>({{
                                           device_ctx.grid.width(),     //[0 .. device_ctx.grid.width() - 1] (length of x channel)
-                                          device_ctx.grid.height() - 1 //[0 .. device_ctx.grid.height() - 2] (# x channels)
+                                          device_ctx.grid.height()     //[0 .. device_ctx.grid.height() - 1] (# x channels)
                                       }},
                                       0);
 
     chany_util_ = vtr::Matrix<double>({{
-                                          device_ctx.grid.width() - 1, //[0 .. device_ctx.grid.width() - 2] (# y channels)
+                                          device_ctx.grid.width(),     //[0 .. device_ctx.grid.width() - 1] (# y channels)
                                           device_ctx.grid.height()     //[0 .. device_ctx.grid.height() - 1] (length of y channel)
                                       }},
                                       0);

From a41a256e0e371c0e4d065d653bb4826f85f9cd64 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Thu, 24 Apr 2025 12:21:33 -0400
Subject: [PATCH 21/66] recompute congestion cost when estimating channel
 utilization

---
 vpr/src/place/annealer.cpp         |  6 ++++++
 vpr/src/place/net_cost_handler.cpp | 19 +++++++++++++++----
 vpr/src/place/net_cost_handler.h   |  2 +-
 3 files changed, 22 insertions(+), 5 deletions(-)

diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index 90d253790bb..0d0abd63781 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -671,6 +671,12 @@ void PlacementAnnealer::outer_loop_update_timing_info() {
         outer_crit_iter_count_++;
     }
 
+    if (congestion_modeling_started_
+        || (placer_stats_.success_rate < placer_opts_.congestion_acceptance_rate_trigger && placer_stats_.av_cost != 0.)) {
+        costs_.congestion_cost = net_cost_handler_.estimate_routing_chann_util();
+        congestion_modeling_started_ = true;
+    }
+
     // Update the cost normalization factors
     costs_.update_norm_factors();
 
diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 7e8604477dc..751a7e48b56 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -300,7 +300,7 @@ std::tuple<double, double, double> NetCostHandler::comp_cube_bb_cong_cost_(e_cos
     }
 
     // Now that all bounding boxes are computed from scratch, we recompute the channel utilization
-    estimate_routing_chann_util();
+//    estimate_routing_chann_util();
 
     // Compute congestion cost using recomputed bounding boxes and channel utilization map
     for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) {
@@ -1564,8 +1564,8 @@ std::pair<double, double> NetCostHandler::recompute_bb_cong_cost_() {
 }
 
 static double wirelength_crossing_count(size_t fanout) {
-    /* Get the expected "crossing count" of a net, based on its number *
-     * of pins.  Extrapolate for very large nets.                      */
+    /* Get the expected "crossing count" of a net, based on its number
+     * of pins.  Extrapolate for very large nets. */
 
     if (fanout > MAX_FANOUT_CROSSING_COUNT) {
         return 2.7933 + 0.02616 * (fanout - MAX_FANOUT_CROSSING_COUNT);
@@ -1710,7 +1710,7 @@ double NetCostHandler::get_total_wirelength_estimate() const {
     return estimated_wirelength;
 }
 
-void NetCostHandler::estimate_routing_chann_util() {
+double NetCostHandler::estimate_routing_chann_util() {
     const auto& cluster_ctx = g_vpr_ctx.clustering();
     const auto& device_ctx = g_vpr_ctx.device();
 
@@ -1757,6 +1757,17 @@ void NetCostHandler::estimate_routing_chann_util() {
 
     acc_chanx_util_ = vtr::PrefixSum2D<double>(chanx_util_);
     acc_chany_util_ = vtr::PrefixSum2D<double>(chany_util_);
+
+    double cong_cost = 0.;
+    // Compute congestion cost using recomputed bounding boxes and channel utilization map
+    for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) {
+        if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
+            net_cong_cost_[net_id] = get_net_cube_cong_cost_(net_id, /*use_ts=*/false);
+            cong_cost += net_cong_cost_[net_id];
+        }
+    }
+
+    return cong_cost;
 }
 
 void NetCostHandler::set_ts_bb_coord_(const ClusterNetId net_id) {
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index 38d5dbb0d72..223e3d87d5a 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -127,7 +127,7 @@ class NetCostHandler {
      */
     double get_total_wirelength_estimate() const;
 
-    void estimate_routing_chann_util();
+    double estimate_routing_chann_util();
 
   private:
     ///@brief Specifies whether the bounding box is computed using cube method or per-layer method.

From 7f6c4962b6ee31dcd70baccf31477d1b443c7476 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Thu, 1 May 2025 12:24:33 -0400
Subject: [PATCH 22/66] weigh congestion cost along x/y axis with the length of
 bb along that dimension

---
 vpr/src/place/net_cost_handler.cpp | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 751a7e48b56..cdd220bc61c 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -1423,14 +1423,19 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) {
 }
 
 double NetCostHandler::get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts) {
-    auto [x_chan_cong, y_chan_cong] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id];
+    const auto [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id];
+
+    const t_bb& bb = use_ts ? ts_bb_coord_new_[net_id] : bb_coords_[net_id];
+
+    int distance_x = bb.xmax - bb.xmin + 1;
+    int distance_y = bb.ymax - bb.ymin + 1;
 
     const float threshold = placer_opts_.congestion_chan_util_threshold;
 
-    x_chan_cong = (x_chan_cong < threshold) ? 0.0f : x_chan_cong - threshold;
-    y_chan_cong = (y_chan_cong < threshold) ? 0.0f : y_chan_cong - threshold;
+    float x_chan_cong = (x_chan_util < threshold) ? 0.0f : x_chan_util - threshold;
+    float y_chan_cong = (y_chan_util < threshold) ? 0.0f : y_chan_util - threshold;
 
-    return x_chan_cong + y_chan_cong;
+    return (distance_x * x_chan_cong) + (distance_y * y_chan_cong);
 }
 
 double NetCostHandler::get_net_per_layer_bb_cost_(ClusterNetId net_id, bool use_ts) {

From 5f11b9b7d3d79ac0ee25eb8d970998ac52d79bdc Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Fri, 16 May 2025 15:00:44 -0400
Subject: [PATCH 23/66] cherrypick calculate_channel_width();

---
 vpr/src/base/stats.cpp | 35 +++++++++++++++++++++++++++++++++++
 vpr/src/base/stats.h   |  2 ++
 2 files changed, 37 insertions(+)

diff --git a/vpr/src/base/stats.cpp b/vpr/src/base/stats.cpp
index 041551f885d..f9eb33e31ce 100644
--- a/vpr/src/base/stats.cpp
+++ b/vpr/src/base/stats.cpp
@@ -113,6 +113,41 @@ void routing_stats(const Netlist<>& net_list,
     }
 }
 
+std::pair<vtr::NdMatrix<int, 3>, vtr::NdMatrix<int, 3>> calculate_channel_width() {
+    const auto& device_ctx = g_vpr_ctx.device();
+    const auto& rr_graph = device_ctx.rr_graph;
+
+    auto chanx_width = vtr::NdMatrix<int, 3>({{(size_t)device_ctx.grid.get_num_layers(),
+                                               device_ctx.grid.width(),
+                                               device_ctx.grid.height()}},
+                                             0);
+
+    auto chany_width = vtr::NdMatrix<int, 3>({{(size_t)device_ctx.grid.get_num_layers(),
+                                               device_ctx.grid.width(),
+                                               device_ctx.grid.height()}},
+                                             0);
+
+    for (RRNodeId node_id : rr_graph.nodes()) {
+        e_rr_type rr_type = rr_graph.node_type(node_id);
+
+        if (rr_type == e_rr_type::CHANX) {
+            int y = rr_graph.node_ylow(node_id);
+            int layer = rr_graph.node_layer(node_id);
+            for (int x = rr_graph.node_xlow(node_id); x <= rr_graph.node_xhigh(node_id); x++) {
+                chanx_width[layer][x][y]++;
+            }
+        } else if (rr_type == e_rr_type::CHANY) {
+            int x = rr_graph.node_xlow(node_id);
+            int layer = rr_graph.node_layer(node_id);
+            for (int y = rr_graph.node_ylow(node_id); y <= rr_graph.node_yhigh(node_id); y++) {
+                chany_width[layer][x][y]++;
+            }
+        }
+    }
+
+    return {chanx_width, chany_width};
+}
+
 void length_and_bends_stats(const Netlist<>& net_list, bool is_flat) {
     int max_bends = 0;
     int total_bends = 0;
diff --git a/vpr/src/base/stats.h b/vpr/src/base/stats.h
index 4f7a3017c5f..48c0bd4c4e9 100644
--- a/vpr/src/base/stats.h
+++ b/vpr/src/base/stats.h
@@ -22,6 +22,8 @@ void routing_stats(const Netlist<>& net_list,
                    int wire_to_ipin_switch,
                    bool is_flat);
 
+std::pair<vtr::NdMatrix<int, 3>, vtr::NdMatrix<int, 3>> calculate_channel_width();
+
 void print_wirelen_prob_dist(bool is_flat);
 
 void print_lambda();

From b1e78f032999e1b875dde313e423b5532750f56f Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Fri, 16 May 2025 15:07:52 -0400
Subject: [PATCH 24/66] calculate channel utilization by using channel width
 info extracted from RR graph

---
 vpr/src/place/net_cost_handler.cpp | 21 ++++++++++++++++++---
 vpr/src/place/net_cost_handler.h   |  4 +++-
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index cdd220bc61c..3c0c23781f9 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -1746,17 +1746,32 @@ double NetCostHandler::estimate_routing_chann_util() {
         }
     }
 
-    const t_chan_width& chan_width = device_ctx.chan_width;
+//    const t_chan_width& chan_width = device_ctx.chan_width;
+
+    if (chanx_width_.empty()) {
+        VTR_ASSERT(chany_width_.empty());
+        std::tie(chanx_width_, chany_width_) = calculate_channel_width();
+    }
+
 
     for (size_t x = 0; x < chanx_util_.dim_size(0); ++x) {
         for (size_t y = 0; y < chanx_util_.dim_size(1); ++y) {
-            chanx_util_[x][y] /= chan_width.x_list[y];
+            if (chanx_width_[0][x][y] > 0) {
+                chanx_util_[x][y] /= chanx_width_[0][x][y];
+            } else {
+                chanx_util_[x][y] = 1.;
+            }
+
         }
     }
 
     for (size_t x = 0; x < chany_util_.dim_size(0); ++x) {
         for (size_t y = 0; y < chany_util_.dim_size(1); ++y) {
-            chany_util_[x][y] /= chan_width.y_list[x];
+            if (chany_width_[0][x][y] > 0) {
+                chany_util_[x][y] /= chany_width_[0][x][y];
+            } else {
+                chany_util_[x][y] = 1.;
+            }
         }
     }
 
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index 223e3d87d5a..3ec31724911 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -245,10 +245,12 @@ class NetCostHandler {
     vtr::PrefixSum2D<double> acc_chanx_util_;
     vtr::PrefixSum2D<double> acc_chany_util_;
 
-
     vtr::Matrix<double> chanx_util_;
     vtr::Matrix<double> chany_util_;
 
+    vtr::NdMatrix<int, 3> chanx_width_;
+    vtr::NdMatrix<int, 3> chany_width_;
+
 
     /**
      * @brief The matrix below is used to calculate a chanz_place_cost_fac based on the average channel width in 

From 3d8941737cf8b670656517806951507a6f8ae25c Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Fri, 23 May 2025 18:46:37 -0400
Subject: [PATCH 25/66] remove distance factor when computing congestion cost
 for each net

---
 vpr/src/place/net_cost_handler.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 3c0c23781f9..002b5d849cf 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -1435,7 +1435,8 @@ double NetCostHandler::get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts)
     float x_chan_cong = (x_chan_util < threshold) ? 0.0f : x_chan_util - threshold;
     float y_chan_cong = (y_chan_util < threshold) ? 0.0f : y_chan_util - threshold;
 
-    return (distance_x * x_chan_cong) + (distance_y * y_chan_cong);
+//    return (distance_x * x_chan_cong) + (distance_y * y_chan_cong);
+    return  x_chan_cong + y_chan_cong;
 }
 
 double NetCostHandler::get_net_per_layer_bb_cost_(ClusterNetId net_id, bool use_ts) {

From 898c73d1997c429a080254c299dfedd3bababb14 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Tue, 27 May 2025 17:43:20 -0400
Subject: [PATCH 26/66] re-normalize timing-tradeoff

---
 vpr/src/place/annealer.cpp   | 25 +++++++++++++++++--------
 vpr/src/place/annealer.h     |  4 +++-
 vpr/src/place/place_util.cpp |  4 ++--
 3 files changed, 22 insertions(+), 11 deletions(-)

diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index 0d0abd63781..1938b456c79 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -227,6 +227,9 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts,
     , congestion_modeling_started_(false) {
     const auto& device_ctx = g_vpr_ctx.device();
 
+    congestion_factor_ = placer_opts_.congestion_factor;
+    placer_opts_.congestion_factor = 0.;
+
     float first_crit_exponent;
     if (placer_opts.place_algorithm.is_timing_driven()) {
         first_crit_exponent = placer_opts.td_place_exp_first; /*this will be modified when rlim starts to change */
@@ -467,7 +470,7 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator,
                            placer_opts_.timing_tradeoff,
                            timing_delta_c,
                            costs_.timing_cost_norm);
-            delta_c = (1 - placer_opts_.timing_tradeoff) * bb_delta_c * costs_.bb_cost_norm
+            delta_c = (1 - placer_opts_.timing_tradeoff - placer_opts_.congestion_factor) * bb_delta_c * costs_.bb_cost_norm
                       + placer_opts_.timing_tradeoff * timing_delta_c * costs_.timing_cost_norm
                       + placer_opts_.congestion_factor * congestion_delta_c * costs_.congestion_cost_norm;
         } else if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) {
@@ -672,9 +675,20 @@ void PlacementAnnealer::outer_loop_update_timing_info() {
     }
 
     if (congestion_modeling_started_
-        || (placer_stats_.success_rate < placer_opts_.congestion_acceptance_rate_trigger && placer_stats_.av_cost != 0.)) {
+        || (annealing_state_.rlim / MoveGenerator::first_rlim) < placer_opts_.congestion_acceptance_rate_trigger) {
         costs_.congestion_cost = net_cost_handler_.estimate_routing_chann_util();
-        congestion_modeling_started_ = true;
+
+
+        if (!congestion_modeling_started_) {
+            VTR_LOG("Congestion modeling started. %f %f\n", placer_opts_.congestion_factor, placer_opts_.timing_tradeoff);
+            placer_opts_.congestion_factor = congestion_factor_;
+            placer_opts_.congestion_factor /= 1.f + congestion_factor_;
+//            placer_opts_.congestion_factor /= 1.f + placer_opts_.congestion_factor;
+            placer_opts_.timing_tradeoff /= 1.f + congestion_factor_;
+            VTR_LOG("Congestion modeling started. %f %f\n", placer_opts_.congestion_factor, placer_opts_.timing_tradeoff);
+            congestion_modeling_started_ = true;
+        }
+
     }
 
     // Update the cost normalization factors
@@ -760,11 +774,6 @@ void PlacementAnnealer::placement_inner_loop() {
     // Calculate the success_rate and std_dev of the costs.
     placer_stats_.calc_iteration_stats(costs_, annealing_state_.move_lim);
 
-    if (congestion_modeling_started_ || placer_stats_.success_rate < placer_opts_.congestion_acceptance_rate_trigger) {
-        net_cost_handler_.estimate_routing_chann_util();
-        congestion_modeling_started_ = true;
-    }
-
     // update the RL agent's state
     if (!quench_started_) {
         if (placer_opts_.place_algorithm.is_timing_driven() && placer_opts_.place_agent_multistate && agent_state_ == e_agent_state::EARLY_IN_THE_ANNEAL) {
diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h
index 07fd4fed3d3..1733d18d9d5 100644
--- a/vpr/src/place/annealer.h
+++ b/vpr/src/place/annealer.h
@@ -268,7 +268,9 @@ class PlacementAnnealer {
     float estimate_starting_temperature_();
 
   private:
-    const t_placer_opts& placer_opts_;
+    t_placer_opts placer_opts_;
+    float congestion_factor_;
+
     PlacerState& placer_state_;
     const PlaceMacros& place_macros_;
     /// Stores different placement cost terms
diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index 1f8b2afe29d..f65f00aa5a2 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -42,10 +42,10 @@ double t_placer_costs::get_total_cost(const t_placer_opts& placer_opts, const t_
         total_cost = bb_cost * bb_cost_norm;
     } else if (placer_opts.place_algorithm.is_timing_driven()) {
         // in timing mode we include both wirelength and timing costs
-        total_cost = (1 - placer_opts.timing_tradeoff) * (bb_cost * bb_cost_norm) + (placer_opts.timing_tradeoff) * (timing_cost * timing_cost_norm);
+        total_cost = (1 - placer_opts.timing_tradeoff - placer_opts.congestion_factor) * (bb_cost * bb_cost_norm) + (placer_opts.timing_tradeoff) * (timing_cost * timing_cost_norm);
     }
 
-    total_cost += congestion_cost * congestion_cost_norm;
+    total_cost += placer_opts.congestion_factor * congestion_cost * congestion_cost_norm;
 
     if (noc_opts.noc) {
         // in noc mode we include noc aggregate bandwidth, noc latency, and noc congestion

From 46a089e4a2f01f54e5c07c3fa687dd9816a0be80 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 18 Jun 2025 18:11:07 -0400
Subject: [PATCH 27/66] add congestion_modeling_started_ flag to NetCostHandler

---
 vpr/src/place/annealer.h           |   2 +-
 vpr/src/place/net_cost_handler.cpp | 104 +++++++++++++++++------------
 vpr/src/place/net_cost_handler.h   |   1 +
 3 files changed, 64 insertions(+), 43 deletions(-)

diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h
index db4d4f6bbc9..fbdccf9abef 100644
--- a/vpr/src/place/annealer.h
+++ b/vpr/src/place/annealer.h
@@ -329,7 +329,7 @@ class PlacementAnnealer {
     int tot_iter_;
     /// Indicates whether the annealer has entered into the quench stage
     bool quench_started_;
-
+    /// Indicates whether routing congestion modeling has been started
     bool congestion_modeling_started_;
 
     void LOG_MOVE_STATS_HEADER();
diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 002b5d849cf..c114d309c4f 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -97,7 +97,8 @@ static double wirelength_crossing_count(size_t fanout);
 NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
                                PlacerState& placer_state,
                                bool cube_bb)
-    : cube_bb_(cube_bb)
+    : congestion_modeling_started_(false)
+    , cube_bb_(cube_bb)
     , placer_state_(placer_state)
     , placer_opts_(placer_opts) {
     const auto& device_ctx = g_vpr_ctx.device();
@@ -279,7 +280,6 @@ std::tuple<double, double, double> NetCostHandler::comp_cube_bb_cong_cost_(e_cos
 
     double bb_cost = 0.;
     double expected_wirelength = 0.;
-    double cong_cost = 0.;
 
     for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) {
         if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
@@ -299,18 +299,17 @@ std::tuple<double, double, double> NetCostHandler::comp_cube_bb_cong_cost_(e_cos
         }
     }
 
-    // Now that all bounding boxes are computed from scratch, we recompute the channel utilization
-//    estimate_routing_chann_util();
-
+    double cong_cost = 0.;
     // Compute congestion cost using recomputed bounding boxes and channel utilization map
-    for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) {
-        if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
-            net_cong_cost_[net_id] = get_net_cube_cong_cost_(net_id, /*use_ts=*/false);
-            cong_cost += net_cong_cost_[net_id];
+    if (congestion_modeling_started_) {
+        for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) {
+            if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
+                net_cong_cost_[net_id] = get_net_cube_cong_cost_(net_id, /*use_ts=*/false);
+                cong_cost += net_cong_cost_[net_id];
+            }
         }
     }
 
-
     return {bb_cost, expected_wirelength, cong_cost};
 }
 
@@ -571,11 +570,12 @@ void NetCostHandler::get_non_updatable_cube_bb_(ClusterNetId net_id, bool use_ts
         num_sink_pin_layer[pin_loc.layer_num]++;
     }
 
-    // the average channel utilization that is going to be updated by this function
-    auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id];
-    const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1);
-    x_chan_util = acc_chanx_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels;
-    y_chan_util = acc_chany_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels;
+    if (congestion_modeling_started_) {
+        auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id];
+        const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1);
+        x_chan_util = acc_chanx_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels;
+        y_chan_util = acc_chany_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels;
+    }
 }
 
 void NetCostHandler::get_non_updatable_per_layer_bb_(ClusterNetId net_id, bool use_ts) {
@@ -883,11 +883,12 @@ void NetCostHandler::update_bb_(ClusterNetId net_id,
         bb_update_status_[net_id] = NetUpdateState::UPDATED_ONCE;
     }
 
-    // the average channel utilization that is going to be updated by this function
-    auto& [x_chan_util, y_chan_util] = ts_avg_chann_util_new_[net_id];
-    const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1);
-    x_chan_util = acc_chanx_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels;
-    y_chan_util = acc_chany_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels;
+    if (congestion_modeling_started_) {
+        auto& [x_chan_util, y_chan_util] = ts_avg_chann_util_new_[net_id];
+        const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1);
+        x_chan_util = acc_chanx_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels;
+        y_chan_util = acc_chany_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels;
+    }
 }
 
 void NetCostHandler::update_layer_bb_(ClusterNetId net_id,
@@ -1323,11 +1324,12 @@ void NetCostHandler::get_bb_from_scratch_(ClusterNetId net_id, bool use_ts) {
     num_on_edges.layer_min = layer_min_edge;
     num_on_edges.layer_max = layer_max_edge;
 
-    // the average channel utilization that is going to be updated by this function
-    auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id];
-    const int total_channels = (coords.xmax - coords.xmin + 1) * (coords.ymax - coords.ymin + 1);
-    x_chan_util = acc_chanx_util_.get_sum(coords.xmin, coords.ymin, coords.xmax, coords.ymax) / total_channels;
-    y_chan_util = acc_chany_util_.get_sum(coords.xmin, coords.ymin, coords.xmax, coords.ymax) / total_channels;
+    if (congestion_modeling_started_) {
+        auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id];
+        const int total_channels = (coords.xmax - coords.xmin + 1) * (coords.ymax - coords.ymin + 1);
+        x_chan_util = acc_chanx_util_.get_sum(coords.xmin, coords.ymin, coords.xmax, coords.ymax) / total_channels;
+        y_chan_util = acc_chany_util_.get_sum(coords.xmin, coords.ymin, coords.xmax, coords.ymax) / total_channels;
+    }
 }
 
 void NetCostHandler::get_layer_bb_from_scratch_(ClusterNetId net_id,
@@ -1423,6 +1425,7 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) {
 }
 
 double NetCostHandler::get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts) {
+    VTR_ASSERT_SAFE(congestion_modeling_started_);
     const auto [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id];
 
     const t_bb& bb = use_ts ? ts_bb_coord_new_[net_id] : bb_coords_[net_id];
@@ -1562,7 +1565,10 @@ std::pair<double, double> NetCostHandler::recompute_bb_cong_cost_() {
         if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
             // Bounding boxes don't have to be recomputed; they're correct.
             bb_cost += net_cost_[net_id];
-            cong_cost += net_cong_cost_[net_id];
+
+            if (congestion_modeling_started_) {
+                cong_cost += net_cong_cost_[net_id];
+            }
         }
     }
 
@@ -1585,10 +1591,12 @@ void NetCostHandler::set_bb_delta_cost_(double& bb_delta_c, double& congestion_d
         ClusterNetId net_id = ts_net;
 
         proposed_net_cost_[net_id] = get_net_bb_cost_functor_(net_id);
-        proposed_net_cong_cost_[net_id] = get_net_cube_cong_cost_(net_id, /*use_ts=*/true);
-
         bb_delta_c += proposed_net_cost_[net_id] - net_cost_[net_id];
-        congestion_delta_c += proposed_net_cong_cost_[net_id] - net_cong_cost_[net_id];
+
+        if (congestion_modeling_started_) {
+            proposed_net_cong_cost_[net_id] = get_net_cube_cong_cost_(net_id, /*use_ts=*/true);
+            congestion_delta_c += proposed_net_cong_cost_[net_id] - net_cong_cost_[net_id];
+        }
     }
 }
 
@@ -1600,6 +1608,7 @@ void NetCostHandler::find_affected_nets_and_update_costs(const PlaceDelayModel*
                                                          double& congestion_delta_c) {
     VTR_ASSERT_DEBUG(bb_delta_c == 0.);
     VTR_ASSERT_DEBUG(timing_delta_c == 0.);
+    VTR_ASSERT_DEBUG(congestion_delta_c == 0.);
     const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist;
 
     ts_nets_to_update_.resize(0);
@@ -1649,21 +1658,27 @@ void NetCostHandler::update_move_nets() {
         }
 
         net_cost_[net_id] = proposed_net_cost_[net_id];
-        net_cong_cost_[net_id] = proposed_net_cong_cost_[net_id];
-
-        /* negative proposed_net_cost value is acting as a flag to mean not computed yet. */
+        // negative proposed_net_cost value is acting as a flag to mean not computed yet.
         proposed_net_cost_[net_id] = -1;
-        proposed_net_cong_cost_[net_id] = -1;
+
+        if (congestion_modeling_started_) {
+            net_cong_cost_[net_id] = proposed_net_cong_cost_[net_id];
+            proposed_net_cong_cost_[net_id] = -1;
+        }
+
         bb_update_status_[net_id] = NetUpdateState::NOT_UPDATED_YET;
     }
 }
 
 void NetCostHandler::reset_move_nets() {
-    /* Reset the net cost function flags first. */
-    for (const ClusterNetId ts_net : ts_nets_to_update_) {
-        ClusterNetId net_id = ts_net;
+    // Reset the net cost function flags first.
+    for (const ClusterNetId net_id : ts_nets_to_update_) {
         proposed_net_cost_[net_id] = -1;
-        proposed_net_cong_cost_[net_id] = -1;
+
+        if (congestion_modeling_started_) {
+            proposed_net_cong_cost_[net_id] = -1;
+        }
+
         bb_update_status_[net_id] = NetUpdateState::NOT_UPDATED_YET;
     }
 }
@@ -1682,11 +1697,16 @@ void NetCostHandler::recompute_costs_from_scratch(const PlaceDelayModel* delay_m
         }
     };
 
-    auto[new_bb_cost, new_cong_cost] = recompute_bb_cong_cost_();
+    auto [new_bb_cost, new_cong_cost] = recompute_bb_cong_cost_();
     check_and_print_cost(new_bb_cost, costs.bb_cost, "bb_cost");
-    check_and_print_cost(new_cong_cost, costs.congestion_cost, "cong_cost");
     costs.bb_cost = new_bb_cost;
-    costs.congestion_cost = new_cong_cost;
+
+    if (congestion_modeling_started_) {
+        check_and_print_cost(new_cong_cost, costs.congestion_cost, "cong_cost");
+        costs.congestion_cost = new_cong_cost;
+    } else {
+        costs.congestion_cost = 0.;
+    }
 
     if (placer_opts_.place_algorithm.is_timing_driven()) {
         double new_timing_cost = 0.;
@@ -1747,8 +1767,6 @@ double NetCostHandler::estimate_routing_chann_util() {
         }
     }
 
-//    const t_chan_width& chan_width = device_ctx.chan_width;
-
     if (chanx_width_.empty()) {
         VTR_ASSERT(chany_width_.empty());
         std::tie(chanx_width_, chany_width_) = calculate_channel_width();
@@ -1779,6 +1797,8 @@ double NetCostHandler::estimate_routing_chann_util() {
     acc_chanx_util_ = vtr::PrefixSum2D<double>(chanx_util_);
     acc_chany_util_ = vtr::PrefixSum2D<double>(chany_util_);
 
+    congestion_modeling_started_ = true;
+
     double cong_cost = 0.;
     // Compute congestion cost using recomputed bounding boxes and channel utilization map
     for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) {
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index 7e88086b3c4..b67c9c95464 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -129,6 +129,7 @@ class NetCostHandler {
     double estimate_routing_chann_util();
 
   private:
+    bool congestion_modeling_started_;
     ///@brief Specifies whether the bounding box is computed using cube method or per-layer method.
     bool cube_bb_;
     ///@brief Determines whether the FPGA has multiple dies (layers)

From 90addfb93173fc064027be2dfc0d3b64ad9cb02f Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 18 Jun 2025 18:12:30 -0400
Subject: [PATCH 28/66] make format

---
 vpr/src/base/stats.cpp             |  4 ++--
 vpr/src/base/stats.h               |  2 +-
 vpr/src/place/annealer.cpp         |  4 +---
 vpr/src/place/net_cost_handler.cpp | 16 ++++++----------
 vpr/src/place/net_cost_handler.h   |  1 -
 vpr/src/place/placer.cpp           |  2 +-
 6 files changed, 11 insertions(+), 18 deletions(-)

diff --git a/vpr/src/base/stats.cpp b/vpr/src/base/stats.cpp
index df5d0e99728..79e507c27d7 100644
--- a/vpr/src/base/stats.cpp
+++ b/vpr/src/base/stats.cpp
@@ -267,8 +267,8 @@ static void get_channel_occupancy_stats(const Netlist<>& net_list, bool /***/) {
 }
 
 void write_channel_occupancy_table(const std::string_view filename,
-                                          const vtr::Matrix<int>& occupancy,
-                                          const std::vector<int>& capacity_list) {
+                                   const vtr::Matrix<int>& occupancy,
+                                   const std::vector<int>& capacity_list) {
     constexpr int w_coord = 6;
     constexpr int w_value = 12;
     constexpr int w_percent = 12;
diff --git a/vpr/src/base/stats.h b/vpr/src/base/stats.h
index 5fc00f0009a..93643384beb 100644
--- a/vpr/src/base/stats.h
+++ b/vpr/src/base/stats.h
@@ -69,4 +69,4 @@ void print_device_utilization(const float target_device_utilization);
  */
 void write_channel_occupancy_table(const std::string_view filename,
                                    const vtr::Matrix<int>& occupancy,
-                                   const std::vector<int>& capacity_list);
\ No newline at end of file
+                                   const std::vector<int>& capacity_list);
diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index a94941d96c9..f91a4a18d62 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -682,17 +682,15 @@ void PlacementAnnealer::outer_loop_update_timing_info() {
         || (annealing_state_.rlim / MoveGenerator::first_rlim) < placer_opts_.congestion_acceptance_rate_trigger) {
         costs_.congestion_cost = net_cost_handler_.estimate_routing_chann_util();
 
-
         if (!congestion_modeling_started_) {
             VTR_LOG("Congestion modeling started. %f %f\n", placer_opts_.congestion_factor, placer_opts_.timing_tradeoff);
             placer_opts_.congestion_factor = congestion_factor_;
             placer_opts_.congestion_factor /= 1.f + congestion_factor_;
-//            placer_opts_.congestion_factor /= 1.f + placer_opts_.congestion_factor;
+            //            placer_opts_.congestion_factor /= 1.f + placer_opts_.congestion_factor;
             placer_opts_.timing_tradeoff /= 1.f + congestion_factor_;
             VTR_LOG("Congestion modeling started. %f %f\n", placer_opts_.congestion_factor, placer_opts_.timing_tradeoff);
             congestion_modeling_started_ = true;
         }
-
     }
 
     // Update the cost normalization factors
diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index c114d309c4f..7a3f24b962a 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -155,14 +155,14 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
     alloc_and_load_chan_w_factors_for_place_cost_();
 
     chanx_util_ = vtr::Matrix<double>({{
-                                          device_ctx.grid.width(),     //[0 .. device_ctx.grid.width() - 1] (length of x channel)
-                                          device_ctx.grid.height()     //[0 .. device_ctx.grid.height() - 1] (# x channels)
+                                          device_ctx.grid.width(), //[0 .. device_ctx.grid.width() - 1] (length of x channel)
+                                          device_ctx.grid.height() //[0 .. device_ctx.grid.height() - 1] (# x channels)
                                       }},
                                       0);
 
     chany_util_ = vtr::Matrix<double>({{
-                                          device_ctx.grid.width(),     //[0 .. device_ctx.grid.width() - 1] (# y channels)
-                                          device_ctx.grid.height()     //[0 .. device_ctx.grid.height() - 1] (length of y channel)
+                                          device_ctx.grid.width(), //[0 .. device_ctx.grid.width() - 1] (# y channels)
+                                          device_ctx.grid.height() //[0 .. device_ctx.grid.height() - 1] (length of y channel)
                                       }},
                                       0);
 
@@ -342,8 +342,6 @@ std::tuple<double, double, double> NetCostHandler::comp_per_layer_bb_cost_(e_cos
         }
     }
 
-
-
     return {cost, expected_wirelength, cong_cost};
 }
 
@@ -1438,8 +1436,8 @@ double NetCostHandler::get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts)
     float x_chan_cong = (x_chan_util < threshold) ? 0.0f : x_chan_util - threshold;
     float y_chan_cong = (y_chan_util < threshold) ? 0.0f : y_chan_util - threshold;
 
-//    return (distance_x * x_chan_cong) + (distance_y * y_chan_cong);
-    return  x_chan_cong + y_chan_cong;
+    //    return (distance_x * x_chan_cong) + (distance_y * y_chan_cong);
+    return x_chan_cong + y_chan_cong;
 }
 
 double NetCostHandler::get_net_per_layer_bb_cost_(ClusterNetId net_id, bool use_ts) {
@@ -1772,7 +1770,6 @@ double NetCostHandler::estimate_routing_chann_util() {
         std::tie(chanx_width_, chany_width_) = calculate_channel_width();
     }
 
-
     for (size_t x = 0; x < chanx_util_.dim_size(0); ++x) {
         for (size_t y = 0; y < chanx_util_.dim_size(1); ++y) {
             if (chanx_width_[0][x][y] > 0) {
@@ -1780,7 +1777,6 @@ double NetCostHandler::estimate_routing_chann_util() {
             } else {
                 chanx_util_[x][y] = 1.;
             }
-
         }
     }
 
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index b67c9c95464..1d83b7ae5fa 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -251,7 +251,6 @@ class NetCostHandler {
     vtr::NdMatrix<int, 3> chanx_width_;
     vtr::NdMatrix<int, 3> chany_width_;
 
-
     /**
      * @brief The matrix below is used to calculate a chanz_place_cost_fac based on the average channel width in 
      * the cross-die-layer direction over a 2D (x,y) region. We don't assume the inter-die connectivity is the same at all (x,y) locations, so we
diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp
index 944d8bcbb57..1fe8e837e3b 100644
--- a/vpr/src/place/placer.cpp
+++ b/vpr/src/place/placer.cpp
@@ -123,7 +123,7 @@ Placer::Placer(const Netlist<>& net_list,
     } else {
         VTR_ASSERT(placer_opts.place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE);
         // Timing cost is not used
-        costs_.timing_cost = std::numeric_limits<double>::quiet_NaN();;
+        costs_.timing_cost = std::numeric_limits<double>::quiet_NaN();
     }
 
     costs_.update_norm_factors();

From 08d48b0ab46721d1890662530bad8abbddc43f77 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Wed, 2 Jul 2025 17:31:46 -0400
Subject: [PATCH 29/66] make format

---
 vpr/src/place/net_cost_handler.cpp | 66 ++++++++++++++++--------------
 1 file changed, 35 insertions(+), 31 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index bf304b762bd..64cb2c7287b 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -96,7 +96,9 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
     , placer_opts_(placer_opts) {
     const auto& device_ctx = g_vpr_ctx.device();
 
-    const int num_layers = device_ctx.grid.get_num_layers();
+    const size_t grid_width = device_ctx.grid.width();
+    const size_t grid_height = device_ctx.grid.height();
+    const size_t num_layers = device_ctx.grid.get_num_layers();
     const size_t num_nets = g_vpr_ctx.clustering().clb_nlist.nets().size();
 
     is_multi_layer_ = num_layers > 1;
@@ -147,25 +149,20 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
 
     alloc_and_load_chan_w_factors_for_place_cost_();
 
-    chanx_util_ = vtr::NdMatrix<double, 3>({{(size_t)device_ctx.grid.get_num_layers(),
-                                             device_ctx.grid.width(),
-                                             device_ctx.grid.height()}},
-                                           0);
-
-    chany_util_ = vtr::NdMatrix<double, 3>({{(size_t)device_ctx.grid.get_num_layers(),
-                                             device_ctx.grid.width(),
-                                             device_ctx.grid.height()}},
-                                           0);
-
-    acc_chanx_util_ = vtr::PrefixSum2D<double>(chanx_util_.dim_size(1), chanx_util_.dim_size(2),
-        [&](size_t x, size_t y) -> double {
-            return chanx_util_[0][x][y];
-        }, 0);
-
-    acc_chany_util_ = vtr::PrefixSum2D<double>(chany_util_.dim_size(1), chany_util_.dim_size(2),
-        [&](size_t x, size_t y) -> double {
-            return chany_util_[0][x][y];
-        }, 0);
+    chanx_util_ = vtr::NdMatrix<double, 3>({{num_layers, grid_width, grid_height}}, 0);
+    chany_util_ = vtr::NdMatrix<double, 3>({{num_layers, grid_width, grid_height}}, 0);
+
+    acc_chanx_util_ = vtr::PrefixSum2D<double>(grid_width,
+                                               grid_height,
+                                               [&](size_t x, size_t y) {
+                                                   return chanx_util_[0][x][y];
+                                               });
+
+    acc_chany_util_ = vtr::PrefixSum2D<double>(grid_width,
+                                               grid_height,
+                                               [&](size_t x, size_t y) {
+                                                   return chany_util_[0][x][y];
+                                               });
 }
 
 void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_() {
@@ -1426,8 +1423,8 @@ double NetCostHandler::get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts)
 
     const t_bb& bb = use_ts ? ts_bb_coord_new_[net_id] : bb_coords_[net_id];
 
-//    int distance_x = bb.xmax - bb.xmin + 1;
-//    int distance_y = bb.ymax - bb.ymin + 1;
+    //    int distance_x = bb.xmax - bb.xmin + 1;
+    //    int distance_y = bb.ymax - bb.ymin + 1;
 
     const float threshold = placer_opts_.congestion_chan_util_threshold;
 
@@ -1730,6 +1727,11 @@ double NetCostHandler::get_total_wirelength_estimate() const {
 
 double NetCostHandler::estimate_routing_chan_util() {
     const auto& cluster_ctx = g_vpr_ctx.clustering();
+    const DeviceContext& device_ctx = g_vpr_ctx.device();
+
+    const size_t grid_width = device_ctx.grid.width();
+    const size_t grid_height = device_ctx.grid.height();
+    const size_t num_layers = device_ctx.grid.get_num_layers();
 
     chanx_util_.fill(0.);
     chany_util_.fill(0.);
@@ -1831,15 +1833,17 @@ double NetCostHandler::estimate_routing_chan_util() {
 
     // For now, congestion modeling in the placement stage is limited to a single die
     // TODO: extend it to multiple dice
-    acc_chanx_util_ = vtr::PrefixSum2D<double>(chanx_util_.dim_size(1), chanx_util_.dim_size(2),
-        [&](size_t x, size_t y) -> double {
-            return chanx_util_[0][x][y];
-        }, 0);
-
-    acc_chany_util_ = vtr::PrefixSum2D<double>(chany_util_.dim_size(1), chany_util_.dim_size(2),
-        [&](size_t x, size_t y) -> double {
-            return chany_util_[0][x][y];
-        }, 0);
+    acc_chanx_util_ = vtr::PrefixSum2D<double>(grid_width,
+                                               grid_height,
+                                               [&](size_t x, size_t y) {
+                                                   return chanx_util_[0][x][y];
+                                               });
+
+    acc_chany_util_ = vtr::PrefixSum2D<double>(grid_width,
+                                               grid_height,
+                                               [&](size_t x, size_t y) {
+                                                   return chany_util_[0][x][y];
+                                               });
 
     congestion_modeling_started_ = true;
 

From 4c635b8784d654b39479f05429db1b1b91a37398 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Thu, 3 Jul 2025 11:18:51 -0400
Subject: [PATCH 30/66] add doxygen comments for congestion cost terms

---
 vpr/src/place/place_util.cpp |  2 +-
 vpr/src/place/place_util.h   | 76 +++++++++++++++++-------------------
 2 files changed, 37 insertions(+), 41 deletions(-)

diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index f65f00aa5a2..9b839e646b8 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -22,7 +22,7 @@ void t_placer_costs::update_norm_factors() {
     }
 
     if (place_algorithm.is_timing_driven()) {
-        //Prevent the norm factor from going to infinity
+        // Prevent the norm factor from going to infinity
         timing_cost_norm = std::min(1 / timing_cost, MAX_INV_TIMING_COST);
     } else {
         // Timing normalization factor is not used
diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h
index c3d6a6669d4..0caa10b8d57 100644
--- a/vpr/src/place/place_util.h
+++ b/vpr/src/place/place_util.h
@@ -63,39 +63,33 @@ struct NocCostTerms {
  * values of the previous iteration. However, the divisions are expensive,
  * so we store their multiplicative inverses when they are updated in
  * the outer loop routines to speed up the normalization process.
- *
- *   @param cost The weighted average of the wiring cost and the timing cost.
- *   @param bb_cost The bounding box cost, aka the wiring cost.
- *   @param timing_cost The timing cost, which is connection delay * criticality.
- *
- *   @param bb_cost_norm The normalization factor for the wiring cost.
- *   @param timing_cost_norm The normalization factor for the timing cost, which
- *              is upper-bounded by the value of MAX_INV_TIMING_COST.
- *
- *   @param noc_cost_terms NoC-related cost terms
- *   @param noc_cost_norm_factors Normalization factors for NoC-related cost terms.
- *
- *   @param MAX_INV_TIMING_COST Stops inverse timing cost from going to infinity
- *              with very lax timing constraints, which avoids multiplying by a
- *              gigantic timing_cost_norm when auto-normalizing. The exact value
- *              of this cost has relatively little impact, but should be large
- *              enough to not affect the timing costs computation for normal
- *              constraints.
- *
- *   @param place_algorithm Determines how the member values are updated upon
- *              each temperature change during the placer annealing process.
  */
 class t_placer_costs {
   public: //members
+    /// The weighted average of the wiring cost, the timing cost, and the congestion cost (if enabled)
     double cost = 0.;
+
+    /// The bounding box cost, aka the wiring cost.
     double bb_cost = 0.;
+
+    /// The timing cost, which is connection delay * criticality.
     double timing_cost = 0.;
+
+    /// The congestion cost, which estimates how much routing channels are over-utilized.
     double congestion_cost = 0.;
+
+    /// The normalization factor for the wiring cost.
     double bb_cost_norm = 0.;
+
+    /// The normalization factor for the timing cost, which is upper-bounded by the value of MAX_INV_TIMING_COST.
     double timing_cost_norm = 0.;
+
+    /// The normalization factor for the congestion cost.
     double congestion_cost_norm = 0.;
 
+    /// NoC-related cost terms.
     NocCostTerms noc_cost_terms;
+    /// Normalization factors for NoC-related cost terms.
     NocCostTerms noc_cost_norm_factors;
 
   public: //Constructor
@@ -133,7 +127,18 @@ class t_placer_costs {
     t_placer_costs& operator+=(const NocCostTerms& noc_delta_cost);
 
   private:
+    /**
+     * @brief Stops inverse timing cost from going to infinity
+     *         with very lax timing constraints, which avoids multiplying by a
+     *         gigantic timing_cost_norm when auto-normalizing. The exact value
+     *         of this cost has relatively little impact, but should be large
+     *         enough to not affect the timing costs computation for normal
+     *         constraints.
+     */
     static constexpr double MAX_INV_TIMING_COST = 1.e12;
+
+    /// Determines how the member values are updated upon
+    /// each temperature change during the placer annealing process.
     t_place_algorithm place_algorithm;
     bool noc_enabled;
 };
@@ -150,39 +155,30 @@ class t_placer_costs {
  * In terms of calculating statistics for total cost, we mean that we
  * operate upon the set of placer cost values gathered after every
  * accepted block move.
- *
- *   @param av_cost
- *              Average total cost. Cost formulation depends on
- *              the place algorithm currently being used.
- *   @param av_bb_cost
- *              Average bounding box (wiring) cost.
- *   @param av_timing_cost
- *              Average timing cost (delay * criticality).
- *   @param sum_of_squares
- *              Sum of squares of the total cost.
- *   @param success_num
- *              Number of accepted block swaps for the current iteration.
- *   @param success_rate
- *              num_accepted / total_trials for the current iteration.
- *   @param std_dev
- *              Standard deviation of the total cost.
- *
  */
 class t_placer_statistics {
   public:
+    /// Average total cost. Cost formulation depends on the place algorithm currently being used.
     double av_cost;
+    /// Average bounding box (wiring) cost.
     double av_bb_cost;
+    /// Average timing cost (delay * criticality).
     double av_timing_cost;
+    /// Average congestion cost.
     double av_cong_cost;
+    /// Sum of squares of the total cost.
     double sum_of_squares;
+    /// Number of accepted block swaps for the current iteration.
     int success_sum;
+    /// num_accepted / total_trials for the current iteration.
     float success_rate;
+    /// Standard deviation of the total cost.
     double std_dev;
 
-  public: //Constructor
+  public: // Constructor
     t_placer_statistics() { reset(); }
 
-  public: //Mutator
+  public: // Mutator
     ///@brief Clear all data fields.
     void reset();
 

From 3becbb669cac971edf2d52a2809e68d31307fb13 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Thu, 3 Jul 2025 11:55:43 -0400
Subject: [PATCH 31/66] rename congestion_acceptance_rate_trigger to
 congestion_rlim_trigger_ratio to be more consistent with what it actually
 does

---
 vpr/src/base/SetupVPR.cpp          |  2 +-
 vpr/src/base/read_options.cpp      |  2 +-
 vpr/src/base/read_options.h        |  2 +-
 vpr/src/base/vpr_types.h           |  2 +-
 vpr/src/place/annealer.cpp         | 24 +++++++++++-------------
 vpr/src/place/net_cost_handler.cpp |  9 +++++----
 vpr/src/place/place_util.cpp       |  4 ++--
 7 files changed, 22 insertions(+), 23 deletions(-)

diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp
index d567219a035..623efca577d 100644
--- a/vpr/src/base/SetupVPR.cpp
+++ b/vpr/src/base/SetupVPR.cpp
@@ -656,7 +656,7 @@ static void SetupPlacerOpts(const t_options& Options, t_placer_opts* PlacerOpts)
 
     PlacerOpts->timing_tradeoff = Options.PlaceTimingTradeoff;
     PlacerOpts->congestion_factor = Options.place_congestion_factor;
-    PlacerOpts->congestion_acceptance_rate_trigger = Options.place_congestion_acceptance_rate_trigger;
+    PlacerOpts->congestion_rlim_trigger_ratio = Options.place_congestion_rlim_trigger_ratio;
     PlacerOpts->congestion_chan_util_threshold = Options.place_congestion_chan_util_threshold;
 
     /* Depends on PlacerOpts->place_algorithm */
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index c49d062a835..b631145c5df 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -2494,7 +2494,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
         .default_value("0.0")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
-    place_timing_grp.add_argument(args.place_congestion_acceptance_rate_trigger, "--congestion_acceptance_rate_trigger")
+    place_timing_grp.add_argument(args.place_congestion_rlim_trigger_ratio, "--congestion_rlim_trigger_ratio")
         .help("To be written")
         .default_value("0.0")
         .show_in(argparse::ShowIn::HELP_ONLY);
diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h
index babfc26396c..267dd2ab8cf 100644
--- a/vpr/src/base/read_options.h
+++ b/vpr/src/base/read_options.h
@@ -188,7 +188,7 @@ struct t_options {
     /* Timing-driven placement options only */
     argparse::ArgValue<float> PlaceTimingTradeoff;
     argparse::ArgValue<float> place_congestion_factor;
-    argparse::ArgValue<float> place_congestion_acceptance_rate_trigger;
+    argparse::ArgValue<float> place_congestion_rlim_trigger_ratio;
     argparse::ArgValue<float> place_congestion_chan_util_threshold;
 
     argparse::ArgValue<int> RecomputeCritIter;
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 3218d23dc06..badd2174c88 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -1014,7 +1014,7 @@ struct t_placer_opts {
     t_annealing_sched anneal_sched; ///<Placement option annealing schedule
     float timing_tradeoff;
     float congestion_factor;
-    float congestion_acceptance_rate_trigger;
+    float congestion_rlim_trigger_ratio;
     float congestion_chan_util_threshold;
     int place_chan_width;
     enum e_pad_loc_type pad_loc_type;
diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index 26787d571db..81597c13205 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -139,7 +139,7 @@ bool t_annealing_state::outer_loop_update(float success_rate,
     }
 
     // Automatically determine exit temperature.
-    auto& cluster_ctx = g_vpr_ctx.clustering();
+    const ClusteringContext& cluster_ctx = g_vpr_ctx.clustering();
     float t_exit = 0.005 * costs.cost / cluster_ctx.clb_nlist.nets().size();
 
     VTR_ASSERT_SAFE(placer_opts.anneal_sched.type == e_sched_type::AUTO_SCHED);
@@ -377,15 +377,13 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator,
 
     MoveOutcomeStats move_outcome_stats;
 
-    /* I'm using negative values of proposed_net_cost as a flag,
-     * so DO NOT use cost functions that can go negative. */
-    double delta_c = 0.;        //Change in cost due to this swap.
-    double bb_delta_c = 0.;     //Change in the bounding box (wiring) cost.
-    double timing_delta_c = 0.; //Change in the timing cost (delay * criticality).
-    double congestion_delta_c = 0.;
+    double delta_c = 0.;            // Change in cost due to this swap.
+    double bb_delta_c = 0.;         // Change in the bounding box (wiring) cost.
+    double timing_delta_c = 0.;     // Change in the timing cost (delay * criticality).
+    double congestion_delta_c = 0.; // Change in the congestion cost
 
-    /* Allow some fraction of moves to not be restricted by rlim,
-     * in the hopes of better escaping local minima. */
+    // Allow some fraction of moves to not be restricted by rlim,
+    // in the hopes of better escaping local minima.
     float rlim;
     if (placer_opts_.rlim_escape_fraction > 0. && rng_.frand() < placer_opts_.rlim_escape_fraction) {
         rlim = std::numeric_limits<float>::infinity();
@@ -401,19 +399,19 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator,
         router_block_move = check_for_router_swap(noc_opts_.noc_swap_percentage, rng_);
     }
 
-    //When manual move toggle button is active, the manual move window asks the user for input.
+    // When manual move toggle button is active, the manual move window asks the user for input.
     if (manual_move_enabled) {
 #ifndef NO_GRAPHICS
         create_move_outcome = manual_move_display_and_propose(manual_move_generator_, blocks_affected_,
                                                               proposed_action.move_type, rlim,
                                                               placer_opts_, criticalities_);
-#endif //NO_GRAPHICS
+#endif // NO_GRAPHICS
     } else if (router_block_move) {
         // generate a move where two random router blocks are swapped
         create_move_outcome = propose_router_swap(blocks_affected_, rlim, blk_loc_registry, place_macros_, rng_);
         proposed_action.move_type = e_move_type::UNIFORM;
     } else {
-        //Generate a new move (perturbation) used to explore the space of possible placements
+        // Generate a new move (perturbation) used to explore the space of possible placements
         create_move_outcome = move_generator.propose_move(blocks_affected_, proposed_action, rlim, placer_opts_, criticalities_);
     }
 
@@ -681,7 +679,7 @@ void PlacementAnnealer::outer_loop_update_timing_info() {
     }
 
     if (congestion_modeling_started_
-        || (annealing_state_.rlim / MoveGenerator::first_rlim) < placer_opts_.congestion_acceptance_rate_trigger) {
+        || (annealing_state_.rlim / MoveGenerator::first_rlim) < placer_opts_.congestion_rlim_trigger_ratio) {
         costs_.congestion_cost = net_cost_handler_.estimate_routing_chan_util();
 
         if (!congestion_modeling_started_) {
diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 64cb2c7287b..44355b3b8f7 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -1421,7 +1421,7 @@ double NetCostHandler::get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts)
     VTR_ASSERT_SAFE(congestion_modeling_started_);
     const auto [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id];
 
-    const t_bb& bb = use_ts ? ts_bb_coord_new_[net_id] : bb_coords_[net_id];
+    //    const t_bb& bb = use_ts ? ts_bb_coord_new_[net_id] : bb_coords_[net_id];
 
     //    int distance_x = bb.xmax - bb.xmin + 1;
     //    int distance_y = bb.ymax - bb.ymin + 1;
@@ -1802,6 +1802,7 @@ double NetCostHandler::estimate_routing_chan_util() {
         }
     }
 
+    // Channel width is computed only once and reused in later calls.
     if (chanx_width_.empty()) {
         VTR_ASSERT(chany_width_.empty());
         std::tie(chanx_width_, chany_width_) = calculate_channel_width();
@@ -1811,9 +1812,9 @@ double NetCostHandler::estimate_routing_chan_util() {
     VTR_ASSERT(chanx_util_.size() == chanx_width_.size());
     VTR_ASSERT(chany_util_.size() == chany_width_.size());
 
-    for (size_t layer = 0; layer < chanx_util_.dim_size(0); ++layer) {
-        for (size_t x = 0; x < chanx_util_.dim_size(1); ++x) {
-            for (size_t y = 0; y < chanx_util_.dim_size(2); ++y) {
+    for (size_t layer = 0; layer < num_layers; ++layer) {
+        for (size_t x = 0; x < grid_width; ++x) {
+            for (size_t y = 0; y < grid_height; ++y) {
                 if (chanx_width_[layer][x][y] > 0) {
                     chanx_util_[layer][x][y] /= chanx_width_[layer][x][y];
                 } else {
diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index 9b839e646b8..c23029b0e00 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -11,7 +11,7 @@
 #include "noc_place_utils.h"
 
 void t_placer_costs::update_norm_factors() {
-    const auto& clustered_nlist = g_vpr_ctx.clustering().clb_nlist;
+    const ClusteredNetlist& clustered_nlist = g_vpr_ctx.clustering().clb_nlist;
 
     bb_cost_norm = 1 / bb_cost;
 
@@ -76,7 +76,7 @@ int get_place_inner_loop_num_move(const t_placer_opts& placer_opts, const t_anne
         move_lim = int(annealing_sched.inner_num * pow(device_size, 2. / 3.) * pow(num_blocks, 2. / 3.));
     }
 
-    /* Avoid having a non-positive move_lim */
+    // Avoid having a non-positive move_lim
     move_lim = std::max(move_lim, 1);
 
     return move_lim;

From ae67ef82e09b9bcd8206438cdef41b8fad64983d Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Thu, 3 Jul 2025 12:08:16 -0400
Subject: [PATCH 32/66] update t_exit to avoid cost factor normalization when
 congestion modeling is enabeld

---
 vpr/src/place/annealer.cpp   | 21 ++++++++++-----------
 vpr/src/place/annealer.h     |  4 ++--
 vpr/src/place/place_util.cpp |  2 +-
 3 files changed, 13 insertions(+), 14 deletions(-)

diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index 81597c13205..40655f73cc6 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -118,6 +118,7 @@ t_annealing_state::t_annealing_state(float first_t,
 }
 
 bool t_annealing_state::outer_loop_update(float success_rate,
+                                          bool congestion_modeling_enabled,
                                           const t_placer_costs& costs,
                                           const t_placer_opts& placer_opts) {
 #ifndef NO_GRAPHICS
@@ -140,7 +141,12 @@ bool t_annealing_state::outer_loop_update(float success_rate,
 
     // Automatically determine exit temperature.
     const ClusteringContext& cluster_ctx = g_vpr_ctx.clustering();
-    float t_exit = 0.005 * costs.cost / cluster_ctx.clb_nlist.nets().size();
+    float t_exit;
+    if (congestion_modeling_enabled) {
+        t_exit = 0.005 * (1. + placer_opts.congestion_factor) * costs.cost / cluster_ctx.clb_nlist.nets().size();
+    } else {
+        t_exit = 0.005 * costs.cost / cluster_ctx.clb_nlist.nets().size();
+    }
 
     VTR_ASSERT_SAFE(placer_opts.anneal_sched.type == e_sched_type::AUTO_SCHED);
     // Automatically adjust alpha according to success rate.
@@ -232,8 +238,6 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts,
     , congestion_modeling_started_(false) {
     const auto& device_ctx = g_vpr_ctx.device();
 
-    congestion_factor_ = placer_opts_.congestion_factor;
-    placer_opts_.congestion_factor = 0.;
 
     float first_crit_exponent;
     if (placer_opts.place_algorithm.is_timing_driven()) {
@@ -474,7 +478,7 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator,
                            placer_opts_.timing_tradeoff,
                            timing_delta_c,
                            costs_.timing_cost_norm);
-            delta_c = (1 - placer_opts_.timing_tradeoff - placer_opts_.congestion_factor) * bb_delta_c * costs_.bb_cost_norm
+            delta_c = (1 - placer_opts_.timing_tradeoff) * bb_delta_c * costs_.bb_cost_norm
                       + placer_opts_.timing_tradeoff * timing_delta_c * costs_.timing_cost_norm
                       + placer_opts_.congestion_factor * congestion_delta_c * costs_.congestion_cost_norm;
         } else if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) {
@@ -683,12 +687,7 @@ void PlacementAnnealer::outer_loop_update_timing_info() {
         costs_.congestion_cost = net_cost_handler_.estimate_routing_chan_util();
 
         if (!congestion_modeling_started_) {
-            VTR_LOG("Congestion modeling started. %f %f\n", placer_opts_.congestion_factor, placer_opts_.timing_tradeoff);
-            placer_opts_.congestion_factor = congestion_factor_;
-            placer_opts_.congestion_factor /= 1.f + congestion_factor_;
-            //            placer_opts_.congestion_factor /= 1.f + placer_opts_.congestion_factor;
-            placer_opts_.timing_tradeoff /= 1.f + congestion_factor_;
-            VTR_LOG("Congestion modeling started. %f %f\n", placer_opts_.congestion_factor, placer_opts_.timing_tradeoff);
+            VTR_LOG("Congestion modeling started.\n");
             congestion_modeling_started_ = true;
         }
     }
@@ -803,7 +802,7 @@ const t_annealing_state& PlacementAnnealer::get_annealing_state() const {
 }
 
 bool PlacementAnnealer::outer_loop_update_state() {
-    return annealing_state_.outer_loop_update(placer_stats_.success_rate, costs_, placer_opts_);
+    return annealing_state_.outer_loop_update(placer_stats_.success_rate, congestion_modeling_started_, costs_, placer_opts_);
 }
 
 void PlacementAnnealer::start_quench() {
diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h
index 324a633c083..c7183cce9a3 100644
--- a/vpr/src/place/annealer.h
+++ b/vpr/src/place/annealer.h
@@ -105,6 +105,7 @@ class t_annealing_state {
      * @return True->continues the annealing. False->exits the annealing.
      */
     bool outer_loop_update(float success_rate,
+                           bool congestion_modeling_enabled,
                            const t_placer_costs& costs,
                            const t_placer_opts& placer_opts);
 
@@ -269,8 +270,7 @@ class PlacementAnnealer {
     float estimate_starting_temperature_();
 
   private:
-    t_placer_opts placer_opts_;
-    float congestion_factor_;
+    const t_placer_opts& placer_opts_;
 
     PlacerState& placer_state_;
     const PlaceMacros& place_macros_;
diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp
index c23029b0e00..e206037bb3e 100644
--- a/vpr/src/place/place_util.cpp
+++ b/vpr/src/place/place_util.cpp
@@ -42,7 +42,7 @@ double t_placer_costs::get_total_cost(const t_placer_opts& placer_opts, const t_
         total_cost = bb_cost * bb_cost_norm;
     } else if (placer_opts.place_algorithm.is_timing_driven()) {
         // in timing mode we include both wirelength and timing costs
-        total_cost = (1 - placer_opts.timing_tradeoff - placer_opts.congestion_factor) * (bb_cost * bb_cost_norm) + (placer_opts.timing_tradeoff) * (timing_cost * timing_cost_norm);
+        total_cost = (1 - placer_opts.timing_tradeoff) * (bb_cost * bb_cost_norm) + (placer_opts.timing_tradeoff) * (timing_cost * timing_cost_norm);
     }
 
     total_cost += placer_opts.congestion_factor * congestion_cost * congestion_cost_norm;

From 73dfa44de1e8c510560d11b8bc3779578c1050eb Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Thu, 3 Jul 2025 13:33:57 -0400
Subject: [PATCH 33/66] make format

---
 vpr/src/place/annealer.cpp | 7 ++-----
 vpr/src/place/annealer.h   | 1 -
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index 40655f73cc6..845b109cbb5 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -141,11 +141,9 @@ bool t_annealing_state::outer_loop_update(float success_rate,
 
     // Automatically determine exit temperature.
     const ClusteringContext& cluster_ctx = g_vpr_ctx.clustering();
-    float t_exit;
+    float t_exit = 0.005 * costs.cost / cluster_ctx.clb_nlist.nets().size();
     if (congestion_modeling_enabled) {
-        t_exit = 0.005 * (1. + placer_opts.congestion_factor) * costs.cost / cluster_ctx.clb_nlist.nets().size();
-    } else {
-        t_exit = 0.005 * costs.cost / cluster_ctx.clb_nlist.nets().size();
+        t_exit *= (1. + placer_opts.congestion_factor);
     }
 
     VTR_ASSERT_SAFE(placer_opts.anneal_sched.type == e_sched_type::AUTO_SCHED);
@@ -238,7 +236,6 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts,
     , congestion_modeling_started_(false) {
     const auto& device_ctx = g_vpr_ctx.device();
 
-
     float first_crit_exponent;
     if (placer_opts.place_algorithm.is_timing_driven()) {
         first_crit_exponent = placer_opts.td_place_exp_first; /*this will be modified when rlim starts to change */
diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h
index c7183cce9a3..e406581956e 100644
--- a/vpr/src/place/annealer.h
+++ b/vpr/src/place/annealer.h
@@ -271,7 +271,6 @@ class PlacementAnnealer {
 
   private:
     const t_placer_opts& placer_opts_;
-
     PlacerState& placer_state_;
     const PlaceMacros& place_macros_;
     /// Stores different placement cost terms

From b1b52f6ef9032ac81baa9a260283f0544cc85c8b Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Mon, 7 Jul 2025 15:45:30 -0400
Subject: [PATCH 34/66] assume cube mode for routing chan util estimation in
 the routing stage

---
 vpr/src/route/route_common.cpp      | 3 +--
 vpr/src/route/route_utilization.cpp | 4 ++--
 vpr/src/route/route_utilization.h   | 4 ++--
 3 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp
index 3b0dc9344bf..0e452808d69 100644
--- a/vpr/src/route/route_common.cpp
+++ b/vpr/src/route/route_common.cpp
@@ -481,11 +481,10 @@ void reset_rr_node_route_structs(const t_router_opts& route_opts) {
     auto& route_ctx = g_vpr_ctx.mutable_routing();
     const auto& device_ctx = g_vpr_ctx.device();
     const auto& blk_loc_registry = g_vpr_ctx.placement().blk_loc_registry();
-    const bool cube_bb = g_vpr_ctx.placement().cube_bb;
 
     VTR_ASSERT(route_ctx.rr_node_route_inf.size() == size_t(device_ctx.rr_graph.num_nodes()));
 
-    RoutingChanUtilEstimator routing_chan_util_estimator(blk_loc_registry, cube_bb);
+    RoutingChanUtilEstimator routing_chan_util_estimator(blk_loc_registry);
     const auto [chanx_util, chany_util] = routing_chan_util_estimator.estimate_routing_chan_util();
 
     for (const RRNodeId rr_id : device_ctx.rr_graph.nodes()) {
diff --git a/vpr/src/route/route_utilization.cpp b/vpr/src/route/route_utilization.cpp
index a5dc3b0b4fc..5127f6e8db8 100644
--- a/vpr/src/route/route_utilization.cpp
+++ b/vpr/src/route/route_utilization.cpp
@@ -5,12 +5,12 @@
 #include "vpr_utils.h"
 #include "route_common.h"
 
-RoutingChanUtilEstimator::RoutingChanUtilEstimator(const BlkLocRegistry& blk_loc_registry, bool cube_bb) {
+RoutingChanUtilEstimator::RoutingChanUtilEstimator(const BlkLocRegistry& blk_loc_registry) {
     placer_state_ = std::make_unique<PlacerState>(/*placement_is_timing_driven=*/false);
     placer_state_->mutable_blk_loc_registry() = blk_loc_registry;
 
     placer_opts_.place_algorithm = e_place_algorithm::BOUNDING_BOX_PLACE;
-    net_cost_handler_ = std::make_unique<NetCostHandler>(placer_opts_, *placer_state_, cube_bb);
+    net_cost_handler_ = std::make_unique<NetCostHandler>(placer_opts_, *placer_state_, /*cube_bb=*/true);
 }
 
 std::pair<vtr::NdMatrix<double, 3>, vtr::NdMatrix<double, 3>> RoutingChanUtilEstimator::estimate_routing_chan_util() {
diff --git a/vpr/src/route/route_utilization.h b/vpr/src/route/route_utilization.h
index 8e71e73375d..c23897efe6d 100644
--- a/vpr/src/route/route_utilization.h
+++ b/vpr/src/route/route_utilization.h
@@ -8,13 +8,13 @@
 
 /**
  * @class RoutingChanUtilEstimator
- * @brief This class computes the net bounding boxes and estimates the routing channel utilization
+ * @brief This class computes the net bounding boxes (cube mode) and estimates the routing channel utilization
  * for each CHANX/CHANY channel by smearing the estimated wirelength for each net across all channels
  * within its bounding box.
  */
 class RoutingChanUtilEstimator {
   public:
-    RoutingChanUtilEstimator(const BlkLocRegistry& blk_loc_registry, bool cube_bb);
+    RoutingChanUtilEstimator(const BlkLocRegistry& blk_loc_registry);
 
     std::pair<vtr::NdMatrix<double, 3>, vtr::NdMatrix<double, 3>> estimate_routing_chan_util();
 

From 5b94af747aeb08870d9579f473f0410d12c9260d Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Tue, 8 Jul 2025 12:03:08 -0400
Subject: [PATCH 35/66] inline doxygen comments for data members of
 t_placer_opts and t_ap_opts

---
 vpr/src/analytical_place/ap_flow_enums.h |   2 +-
 vpr/src/base/vpr_types.h                 | 160 +++++++++++------------
 2 files changed, 74 insertions(+), 88 deletions(-)

diff --git a/vpr/src/analytical_place/ap_flow_enums.h b/vpr/src/analytical_place/ap_flow_enums.h
index da47927d5d1..707a842ae8d 100644
--- a/vpr/src/analytical_place/ap_flow_enums.h
+++ b/vpr/src/analytical_place/ap_flow_enums.h
@@ -27,7 +27,7 @@ enum class e_ap_analytical_solver {
  */
 enum class e_ap_partial_legalizer {
     BiPartitioning, ///< Partial Legalizer which forms minimum windows around dense regions and uses bipartitioning to spread blocks over windows.
-    FlowBased       ///> Partial Legalizer which flows blocks from overfilled bins to underfilled bins.
+    FlowBased       ///< Partial Legalizer which flows blocks from overfilled bins to underfilled bins.
 };
 
 /**
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index badd2174c88..ac9eef9d2f9 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -954,88 +954,77 @@ enum class e_move_type;
 
 /**
  * @brief Various options for the placer.
- *
- *   @param place_algorithm
- *              Controls which placement algorithm is used.
- *   @param place_quench_algorithm
- *              Controls which placement algorithm is used
- *              during placement quench.
- *   @param timing_tradeoff
- *              When in CRITICALITY_TIMING_PLACE mode, what is the
- *              tradeoff between timing and wiring costs.
- *   @param place_chan_width
- *              The channel width assumed if only one placement is performed.
- *   @param pad_loc_type
- *              Are pins FREE or fixed randomly.
- *   @param constraints_file
- *              File that specifies locations of locked down (constrained)
- *              blocks for placement. Empty string means no constraints file.
- *   @param write_initial_place_file
- *              Write the initial placement into this file. Empty string means
- *              the initial placement is not written.
- *   @param pad_loc_file
- *              File to read pad locations from if pad_loc_type is USER.
- *   @param place_freq
- *              Should the placement be skipped, done once, or done
- *              for each channel width in the binary search. (Default: ONCE)
- *   @param recompute_crit_iter
- *              How many temperature stages pass before we recompute
- *              criticalities based on the current placement and its
- *              estimated point-to-point delays.
- *   @param inner_loop_crit_divider
- *              (move_lim/inner_loop_crit_divider) determines how
- *              many inner_loop iterations pass before a recompute
- *              of criticalities is done.
- *   @param td_place_exp_first
- *              Exponent that is used in the CRITICALITY_TIMING_PLACE
- *              mode to specify the initial value of `crit_exponent`.
- *              After we map the slacks to criticalities, this value
- *              is used to `sharpen` the criticalities, making connections
- *              with worse slacks more critical.
- *   @param td_place_exp_last
- *              Value that the crit_exponent will be at the end.
- *   @param doPlacement
- *              True if placement is supposed to be done in the CAD flow.
- *              False if otherwise.
- *   @param place_constraint_expand
- *              Integer value that specifies how far to expand the floorplan
- *              region when printing out floorplan constraints based on
- *              current placement.
- *   @param place_constraint_subtile
- *              True if subtiles should be specified when printing floorplan
- *              constraints. False if not.
- *   @param place_auto_init_t_scale
- *              When the annealer is using the automatic schedule, this option
- *              scales the initial temperature selected.
  */
 struct t_placer_opts {
+    /// Controls which placement algorithm is used.
     t_place_algorithm place_algorithm;
+
+    /// Controls which placement algorithm is used during placement quench.
     t_place_algorithm place_quench_algorithm;
-    t_annealing_sched anneal_sched; ///<Placement option annealing schedule
+
+    /// Placement option annealing schedule
+    t_annealing_sched anneal_sched;
+
+    /// When in CRITICALITY_TIMING_PLACE mode, what is the tradeoff between timing and wiring costs.
     float timing_tradeoff;
+
     float congestion_factor;
     float congestion_rlim_trigger_ratio;
     float congestion_chan_util_threshold;
+
+    /// The channel width assumed if only one placement is performed.
     int place_chan_width;
+
+    /// Are pins FREE or fixed randomly.
     enum e_pad_loc_type pad_loc_type;
+
+    /// File that specifies locations of locked down (constrained) blocks for placement. Empty string means no constraints file.
     std::string constraints_file;
+
+    /// Write the initial placement into this file. Empty string means the initial placement is not written.
     std::string write_initial_place_file;
+
     std::string read_initial_place_file;
+
+    /// Should the placement be skipped, done once, or done for each channel width in the binary search. (Default: ONCE)
     enum pfreq place_freq;
+
+    /// How many temperature stages pass before we recompute criticalities
+    /// based on the current placement and its estimated point-to-point delays.
     int recompute_crit_iter;
+
+    /// (move_lim/inner_loop_crit_divider) determines how many inner_loop iterations pass before a recompute of criticalities is done.
     int inner_loop_recompute_divider;
+
     int quench_recompute_divider;
+
+    /**
+     * Exponent that is used in the CRITICALITY_TIMING_PLACE mode to specify the initial value of `crit_exponent`.
+     * After we map the slacks to criticalities, this value is used to `sharpen` the criticalities, making
+     * connections with worse slacks more critical.
+     */
     float td_place_exp_first;
+
     int seed;
+
+    /// Value that the crit_exponent will be at the end.
     float td_place_exp_last;
+
+    /// True if placement is supposed to be done in the CAD flow. False if otherwise.
     e_stage_action doPlacement;
+
     float rlim_escape_fraction;
+
     std::string move_stats_file;
+
     int placement_saves_per_temperature;
+
     e_place_effort_scaling effort_scaling;
+
     e_timing_update_type timing_update_type;
 
     PlaceDelayModelType delay_model_type;
+
     e_reducer delay_model_reducer;
 
     float delay_offset;
@@ -1050,23 +1039,40 @@ struct t_placer_opts {
 
     std::string write_placement_delay_lookup;
     std::string read_placement_delay_lookup;
+
     vtr::vector<e_move_type, float> place_static_move_prob;
+
     bool RL_agent_placement;
     bool place_agent_multistate;
     bool place_checkpointing;
+
     int place_high_fanout_net;
+
     e_place_bounding_box_mode place_bounding_box_mode;
+
     e_agent_algorithm place_agent_algorithm;
+
     float place_agent_epsilon;
     float place_agent_gamma;
     float place_dm_rlim;
+
     e_agent_space place_agent_space;
+
     std::string place_reward_fun;
+
     float place_crit_limit;
+
+
+    /// Integer value that specifies how far to expand the floorplan region when
+    /// printing out floorplan constraints based on current placement.
     int place_constraint_expand;
+
+    /// True if subtiles should be specified when printing floorplan constraints. False if not.
     bool place_constraint_subtile;
+
     int floorplan_num_horizontal_partitions;
     int floorplan_num_vertical_partitions;
+
     bool place_quench_only;
 
     int placer_debug_block;
@@ -1082,6 +1088,7 @@ struct t_placer_opts {
 
     e_place_delta_delay_algorithm place_delta_delay_matrix_calculation_method;
 
+    /// When the annealer is using the automatic schedule, this option scales the initial temperature selected.
     float place_auto_init_t_scale;
 };
 
@@ -1091,63 +1098,42 @@ struct t_placer_opts {
 
 /**
  * @brief Various options for the Analytical Placer.
- *
- *   @param doAnalyticalPlacement
- *              True if analytical placement is supposed to be done in the CAD
- *              flow. False if otherwise.
- *   @param analytical_solver_type
- *              The type of analytical solver the Global Placer in the AP flow
- *              will use.
- *   @param partial_legalizer_type
- *              The type of partial legalizer the Global Placer in the AP flow
- *              will use.
- *   @param full_legalizer_type
- *              The type of full legalizer the AP flow will use.
- *   @param detailed_placer_type
- *              The type of detailed placter the AP flow will use.
- *   @param ap_timing_tradeoff
- *              A trade-off parameter used to decide how focused the AP flow
- *              should be on optimizing timing over wirelength.
- *   @param ap_high_fanout_threshold;
- *              The threshold to ignore nets with higher fanout than that
- *              value while constructing the solver.
- *   @param ap_partial_legalizer_target_density
- *              Vector of strings passed by the user to configure the target
- *              density of different physical tiles on the device.
- *   @param appack_max_dist_th
- *              Array of string passed by the user to configure the max candidate
- *              distance thresholds.
- *   @param num_threads
- *              The number of threads the AP flow can use.
- *   @param log_verbosity
- *              The verbosity level of log messages in the AP flow, with higher
- *              values leading to more verbose messages.
- *   @param generate_mass_report
- *              Whether to generate a mass report during global placement or not.
  */
 struct t_ap_opts {
+    /// True if analytical placement is supposed to be done in the CAD flow. False if otherwise.
     e_stage_action doAP;
 
+    /// The type of analytical solver the Global Placer in the AP flow will use.
     e_ap_analytical_solver analytical_solver_type;
 
+    /// The type of partial legalizer the Global Placer in the AP flow will use.
     e_ap_partial_legalizer partial_legalizer_type;
 
+    /// The type of full legalizer the AP flow will use.
     e_ap_full_legalizer full_legalizer_type;
 
+    /// The type of detailed placer the AP flow will use.
     e_ap_detailed_placer detailed_placer_type;
 
+    /// A trade-off parameter used to decide how focused the AP flow should be on optimizing timing over wirelength.
     float ap_timing_tradeoff;
 
+    /// The threshold to ignore nets with higher fanout than that value while constructing the solver.
     int ap_high_fanout_threshold;
 
+    /// Vector of strings passed by the user to configure the target density of different physical tiles on the device.
     std::vector<std::string> ap_partial_legalizer_target_density;
 
+    /// Array of string passed by the user to configure the max candidate distance thresholds.
     std::vector<std::string> appack_max_dist_th;
 
+    /// The number of threads the AP flow can use.
     unsigned num_threads;
 
+    /// The verbosity level of log messages in the AP flow, with higher values leading to more verbose messages.
     int log_verbosity;
 
+    /// Whether to generate a mass report during global placement or not.
     bool generate_mass_report;
 };
 

From 2703a644ac7df3b520a1be6ead03d771e8ecc0b4 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Tue, 8 Jul 2025 12:10:18 -0400
Subject: [PATCH 36/66] fix valgrind issue (using uninitialized variable)

---
 vpr/src/place/net_cost_handler.cpp  | 12 +++++++-----
 vpr/src/place/net_cost_handler.h    |  2 +-
 vpr/src/route/route_utilization.cpp |  2 +-
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 44355b3b8f7..c2356c2de8a 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -1725,7 +1725,7 @@ double NetCostHandler::get_total_wirelength_estimate() const {
     return estimated_wirelength;
 }
 
-double NetCostHandler::estimate_routing_chan_util() {
+double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost/* = true*/) {
     const auto& cluster_ctx = g_vpr_ctx.clustering();
     const DeviceContext& device_ctx = g_vpr_ctx.device();
 
@@ -1850,10 +1850,12 @@ double NetCostHandler::estimate_routing_chan_util() {
 
     double cong_cost = 0.;
     // Compute congestion cost using recomputed bounding boxes and channel utilization map
-    for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) {
-        if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
-            net_cong_cost_[net_id] = get_net_cube_cong_cost_(net_id, /*use_ts=*/false);
-            cong_cost += net_cong_cost_[net_id];
+    if (compute_congestion_cost) {
+        for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) {
+            if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
+                net_cong_cost_[net_id] = get_net_cube_cong_cost_(net_id, /*use_ts=*/false);
+                cong_cost += net_cong_cost_[net_id];
+            }
         }
     }
 
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index 70e6cad3eb9..d107e8be4df 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -143,7 +143,7 @@ class NetCostHandler {
      *
      * @return Total congestion cost.
      */
-    double estimate_routing_chan_util();
+    double estimate_routing_chan_util(bool compute_congestion_cost = true);
 
     std::pair<const vtr::NdMatrix<double, 3>&, const vtr::NdMatrix<double, 3>&> get_chanxy_util() const;
 
diff --git a/vpr/src/route/route_utilization.cpp b/vpr/src/route/route_utilization.cpp
index 5127f6e8db8..990559269d1 100644
--- a/vpr/src/route/route_utilization.cpp
+++ b/vpr/src/route/route_utilization.cpp
@@ -24,7 +24,7 @@ std::pair<vtr::NdMatrix<double, 3>, vtr::NdMatrix<double, 3>> RoutingChanUtilEst
         net_cost_handler_->comp_bb_cong_cost(e_cost_methods::NORMAL);
 
         // Estimate routing channel utilization using
-        net_cost_handler_->estimate_routing_chan_util();
+        net_cost_handler_->estimate_routing_chan_util(/*compute_congestion_cost=*/false);
 
         return net_cost_handler_->get_chanxy_util();
     } else {

From 48f725cad17d261e0a620e1ae48039f34fafd218 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Tue, 8 Jul 2025 15:14:31 -0400
Subject: [PATCH 37/66] make format

---
 vpr/src/base/vpr_types.h           | 1 -
 vpr/src/place/net_cost_handler.cpp | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index eb10ea3ad2c..a92515703b2 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -1062,7 +1062,6 @@ struct t_placer_opts {
 
     float place_crit_limit;
 
-
     /// Integer value that specifies how far to expand the floorplan region when
     /// printing out floorplan constraints based on current placement.
     int place_constraint_expand;
diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 14445be60ef..8e78340da9c 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -1723,7 +1723,7 @@ double NetCostHandler::get_total_wirelength_estimate() const {
     return estimated_wirelength;
 }
 
-double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost/* = true*/) {
+double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost /* = true*/) {
     const auto& cluster_ctx = g_vpr_ctx.clustering();
     const DeviceContext& device_ctx = g_vpr_ctx.device();
 

From 21c3318822137de806f62ff1c451d95e01ebffe7 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Tue, 8 Jul 2025 15:36:01 -0400
Subject: [PATCH 38/66] doxygen comments for some members of t_router_opts

---
 vpr/src/base/vpr_types.h | 86 ++++++++++++++++------------------------
 1 file changed, 35 insertions(+), 51 deletions(-)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index a92515703b2..82f6ec0b390 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -1140,56 +1140,6 @@ struct t_ap_opts {
  * Router data types
  *******************************************************************/
 
-/* All the parameters controlling the router's operation are in this        *
- * structure.                                                               *
- * first_iter_pres_fac:  Present sharing penalty factor used for the        *
- *                 very first (congestion mapping) Pathfinder iteration.    *
- * initial_pres_fac:  Initial present sharing penalty factor for            *
- *                    Pathfinder; used to set pres_fac on 2nd iteration.    *
- * pres_fac_mult:  Amount by which pres_fac is multiplied each              *
- *                 routing iteration.                                       *
- * acc_fac:  Historical congestion cost multiplier.  Used unchanged         *
- *           for all iterations.                                            *
- * bend_cost:  Cost of a bend (usually non-zero only for global routing).   *
- * max_router_iterations:  Maximum number of iterations before giving       *
- *                up.                                                       *
- * min_incremental_reroute_fanout: Minimum fanout a net needs to have       *
- *              for incremental reroute to be applied to it through route   *
- *              tree pruning. Larger circuits should get larger thresholds  *
- * bb_factor:  Linear distance a route can go outside the net bounding      *
- *             box.                                                         *
- * route_type:  GLOBAL or DETAILED.                                         *
- * fixed_channel_width:  Only attempt to route the design once, with the    *
- *                       channel width given.  If this variable is          *
- *                       == NO_FIXED_CHANNEL_WIDTH, do a binary search      *
- *                       on channel width.                                  *
- * router_algorithm:  TIMING_DRIVEN or PARALLEL.  Selects the desired       *
- * routing algorithm.                                                       *
- * base_cost_type: Specifies how to compute the base cost of each type of   *
- *                 rr_node.  DELAY_NORMALIZED -> base_cost = "demand"       *
- *                 x average delay to route past 1 CLB.  DEMAND_ONLY ->     *
- *                 expected demand of this node (old breadth-first costs).  *
- *                                                                          *
- * The following parameters are used only by the timing-driven router.      *
- *                                                                          *
- * astar_fac:  Factor (alpha) used to weight expected future costs to       *
- *             target in the timing_driven router.  astar_fac = 0 leads to  *
- *             an essentially breadth-first search, astar_fac = 1 is near   *
- *             the usual astar algorithm and astar_fac > 1 are more         *
- *             aggressive.                                                  *
- * astar_offset: Offset that is subtracted from the lookahead (expected     *
- *               future costs) in the timing-driven router.                 *
- * max_criticality: The maximum criticality factor (from 0 to 1) any sink   *
- *                  will ever have (i.e. clip criticality to this number).  *
- * criticality_exp: Set criticality to (path_length(sink) / longest_path) ^ *
- *                  criticality_exp (then clip to max_criticality).         *
- * doRouting: true if routing is supposed to be done, false otherwise       *
- * routing_failure_predictor: sets the configuration to be used by the      *
- * routing failure predictor, how aggressive the threshold used to judge    *
- * and abort routings deemed unroutable                                     *
- * write_rr_graph_name: stores the file name of the output rr graph         *
- * read_rr_graph_name:  stores the file name of the rr graph to be read by vpr */
-
 enum e_router_algorithm {
     NESTED,
     PARALLEL,
@@ -1249,25 +1199,54 @@ enum class e_incr_reroute_delay_ripup {
 
 constexpr int NO_FIXED_CHANNEL_WIDTH = -1;
 
+/**
+ * @brief Parameters controlling the router's operation.
+ */
 struct t_router_opts {
     bool read_rr_edge_metadata = false;
     bool do_check_rr_graph = true;
+
+    /// Present sharing penalty factor used for the very first (congestion mapping) Pathfinder iteration.
     float first_iter_pres_fac;
+    /// Initial present sharing penalty factor for Pathfinder; used to set pres_fac on 2nd iteration.
     float initial_pres_fac;
+    /// Amount by which pres_fac is multiplied each routing iteration.
     float pres_fac_mult;
     float max_pres_fac;
+
+    /// Historical congestion cost multiplier. Used unchanged for all iterations.
     float acc_fac;
+    /// Cost of a bend (usually non-zero only for global routing).
     float bend_cost;
+    /// Maximum number of iterations before giving up.
     int max_router_iterations;
+    /// Minimum fanout a net needs to have for incremental reroute to be applied to it through route tree pruning.
+    /// Larger circuits should get larger thresholds
     int min_incremental_reroute_fanout;
     e_incr_reroute_delay_ripup incr_reroute_delay_ripup;
+    /// Linear distance a route can go outside the net bounding box.
     int bb_factor;
+    /// GLOBAL or DETAILED.
     enum e_route_type route_type;
+    /// Only attempt to route the design once, with the channel width given.
+    /// If this variable is == NO_FIXED_CHANNEL_WIDTH, do a binary search on channel width.
     int fixed_channel_width;
-    int min_channel_width_hint; ///<Hint to binary search of what the minimum channel width is
+    /// Hint to binary search of what the minimum channel width is
+    int min_channel_width_hint;
+    /// TIMING_DRIVEN or PARALLEL.  Selects the desired routing algorithm.
     enum e_router_algorithm router_algorithm;
+
+    /// Specifies how to compute the base cost of each type of rr_node.
+    /// DELAY_NORMALIZED -> base_cost = "demand" x average delay to route past 1 CLB.
+    /// DEMAND_ONLY -> expected demand of this node (old breadth-first costs).
     enum e_base_cost_type base_cost_type;
+
+    /// Factor (alpha) used to weight expected future costs to target in the timing_driven router.
+    /// astar_fac = 0 leads to an essentially breadth-first search,
+    /// astar_fac = 1 is near the usual astar algorithm and astar_fac > 1 are more aggressive.
     float astar_fac;
+
+    /// Offset that is subtracted from the lookahead (expected future costs) in the timing-driven router.
     float astar_offset;
     float router_profiler_astar_fac;
     bool enable_parallel_connection_router;
@@ -1276,7 +1255,9 @@ struct t_router_opts {
     int multi_queue_num_threads;
     int multi_queue_num_queues;
     bool multi_queue_direct_draining;
+    /// The maximum criticality factor (from 0 to 1) any sink will ever have (i.e. clip criticality to this number).
     float max_criticality;
+    /// Set criticality to (path_length(sink) / longest_path) ^ criticality_exp (then clip to max_criticality).
     float criticality_exp;
     float init_wirelength_abort_threshold;
     bool verify_binary_search;
@@ -1284,7 +1265,10 @@ struct t_router_opts {
     bool congestion_analysis;
     bool fanout_analysis;
     bool switch_usage_analysis;
+    /// true if routing is supposed to be done, false otherwise
     e_stage_action doRouting;
+    /// the configuration to be used by the routing failure predictor,
+    /// how aggressive the threshold used to judge and abort routings deemed unroutable
     enum e_routing_failure_predictor routing_failure_predictor;
     enum e_routing_budgets_algorithm routing_budgets_algorithm;
     bool save_routing_per_iteration;

From ce611db50e3ac44a446de5f3db84a916ba675ad8 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Tue, 8 Jul 2025 18:16:11 -0400
Subject: [PATCH 39/66] comment congestion parameters in t_placer_opts

---
 vpr/src/base/vpr_types.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 82f6ec0b390..88eb287f576 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -968,8 +968,13 @@ struct t_placer_opts {
     /// When in CRITICALITY_TIMING_PLACE mode, what is the tradeoff between timing and wiring costs.
     float timing_tradeoff;
 
+    /// Weight for how much congestion affects placement cost.
+    /// Higher means congestion is more important.
     float congestion_factor;
+    /// Start using congestion cost when (current rlim / initial rlim) drops below this value.
     float congestion_rlim_trigger_ratio;
+    /// Nets with average channel usage (withing their bounding box) above this threshold
+    /// are predicted to face some congestion in the routing stage.
     float congestion_chan_util_threshold;
 
     /// The channel width assumed if only one placement is performed.

From b47514bea87ecd835220990da138d87eaddcac3c Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Tue, 8 Jul 2025 18:21:32 -0400
Subject: [PATCH 40/66] remove dead code and add doxygen comment for
 congestion_modeling_started

---
 vpr/src/place/net_cost_handler.cpp |  8 +-------
 vpr/src/place/net_cost_handler.h   | 17 +++++++++--------
 2 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 8e78340da9c..b73e7840a57 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -1419,17 +1419,11 @@ double NetCostHandler::get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts)
     VTR_ASSERT_SAFE(congestion_modeling_started_);
     const auto [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id];
 
-    //    const t_bb& bb = use_ts ? ts_bb_coord_new_[net_id] : bb_coords_[net_id];
-
-    //    int distance_x = bb.xmax - bb.xmin + 1;
-    //    int distance_y = bb.ymax - bb.ymin + 1;
-
     const float threshold = placer_opts_.congestion_chan_util_threshold;
 
     float x_chan_cong = (x_chan_util < threshold) ? 0.0f : x_chan_util - threshold;
     float y_chan_cong = (y_chan_util < threshold) ? 0.0f : y_chan_util - threshold;
-
-    //    return (distance_x * x_chan_cong) + (distance_y * y_chan_cong);
+    
     return x_chan_cong + y_chan_cong;
 }
 
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index d107e8be4df..1970f49977c 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -148,22 +148,23 @@ class NetCostHandler {
     std::pair<const vtr::NdMatrix<double, 3>&, const vtr::NdMatrix<double, 3>&> get_chanxy_util() const;
 
   private:
+    /// Indicates whether congestion cost modeling is enabled.
     bool congestion_modeling_started_;
-    ///@brief Specifies whether the bounding box is computed using cube method or per-layer method.
+    /// Specifies whether the bounding box is computed using cube method or per-layer method.
     bool cube_bb_;
-    ///@brief Determines whether the FPGA has multiple dies (layers)
+    /// Determines whether the FPGA has multiple dies (layers)
     bool is_multi_layer_;
-    ///@brief A reference to the placer's state to be updated by this object.
+    /// A reference to the placer's state to be updated by this object.
     PlacerState& placer_state_;
-    ///@brief Contains some parameter that determine how the placement cost is computed.
+    /// Contains some parameter that determine how the placement cost is computed.
     const t_placer_opts& placer_opts_;
-    ///@brief Points to the proper method for computing the bounding box cost from scratch.
+    /// Points to the proper method for computing the bounding box cost from scratch.
     std::function<std::tuple<double, double, double>(e_cost_methods method)> comp_bb_cong_cost_functor_;
-    ///@brief Points to the proper method for updating the bounding box of a net.
+    /// Points to the proper method for updating the bounding box of a net.
     std::function<void(ClusterNetId net_id, t_physical_tile_loc pin_old_loc, t_physical_tile_loc pin_new_loc, bool is_driver)> update_bb_functor_;
-    ///@brief Points to the proper method for getting the bounding box cost of a net
+    /// Points to the proper method for getting the bounding box cost of a net
     std::function<double(ClusterNetId)> get_net_bb_cost_functor_;
-    ///@brief Points to the proper method for getting the non-updatable bounding box of a net
+    /// Points to the proper method for getting the non-updatable bounding box of a net
     std::function<void(const ClusterNetId net)> get_non_updatable_bb_functor_;
 
     /**

From 12286012f4beaa6bedd1040ee02749c4bd97ac33 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Tue, 8 Jul 2025 19:13:50 -0400
Subject: [PATCH 41/66] add ChannelData struct

---
 vpr/src/place/net_cost_handler.cpp | 121 +++++++++++++++--------------
 vpr/src/place/net_cost_handler.h   |  23 +++---
 2 files changed, 74 insertions(+), 70 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index b73e7840a57..b88fbc87dad 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -128,7 +128,7 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
         get_non_updatable_bb_functor_ = std::bind(&NetCostHandler::get_non_updatable_per_layer_bb_, this, std::placeholders::_1, /*use_ts=*/true);
     }
 
-    /* This initializes the whole matrix to OPEN which is an invalid value*/
+    // This initializes the whole matrix to OPEN which is an invalid value
     ts_layer_sink_pin_count_.resize({num_nets, size_t(num_layers)}, OPEN);
     num_sink_pin_layer_.resize({num_nets, size_t(num_layers)}, OPEN);
 
@@ -140,27 +140,27 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
     net_cong_cost_.resize(num_nets, -1.);
     proposed_net_cong_cost_.resize(num_nets, -1.);
 
-    /* Used to store costs for moves not yet made and to indicate when a net's
-     * cost has been recomputed. proposed_net_cost[inet] < 0 means net's cost hasn't
-     * been recomputed. */
+    // Used to store costs for moves not yet made and to indicate when a net's
+    // cost has been recomputed. proposed_net_cost[inet] < 0 means net's cost hasn't
+    // been recomputed.
     bb_update_status_.resize(num_nets, NetUpdateState::NOT_UPDATED_YET);
 
     alloc_and_load_chan_w_factors_for_place_cost_();
 
-    chanx_util_ = vtr::NdMatrix<double, 3>({{num_layers, grid_width, grid_height}}, 0);
-    chany_util_ = vtr::NdMatrix<double, 3>({{num_layers, grid_width, grid_height}}, 0);
+    chan_util_.x = vtr::NdMatrix<double, 3>({{num_layers, grid_width, grid_height}}, 0);
+    chan_util_.y = vtr::NdMatrix<double, 3>({{num_layers, grid_width, grid_height}}, 0);
 
-    acc_chanx_util_ = vtr::PrefixSum2D<double>(grid_width,
-                                               grid_height,
-                                               [&](size_t x, size_t y) {
-                                                   return chanx_util_[0][x][y];
-                                               });
+    acc_chan_util_.x = vtr::PrefixSum2D<double>(grid_width,
+                                                grid_height,
+                                                [&](size_t x, size_t y) {
+                                                    return chan_util_.x[0][x][y];
+                                                });
 
-    acc_chany_util_ = vtr::PrefixSum2D<double>(grid_width,
-                                               grid_height,
-                                               [&](size_t x, size_t y) {
-                                                   return chany_util_[0][x][y];
-                                               });
+    acc_chan_util_.y = vtr::PrefixSum2D<double>(grid_width,
+                                                grid_height,
+                                                [&](size_t x, size_t y) {
+                                                    return chan_util_.y[0][x][y];
+                                                });
 }
 
 void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_() {
@@ -178,25 +178,26 @@ void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_() {
      * This returns the total number of tracks between channels 'low' and 'high',
      * including tracks in these channels.
      */
-    acc_chanx_width_ = vtr::PrefixSum1D<int>(grid_height, [&](size_t y) noexcept {
+    acc_chan_width_.x = vtr::PrefixSum1D<int>(grid_height, [&](size_t y) noexcept {
         int chan_x_width = device_ctx.chan_width.x_list[y];
 
-        /* If the number of tracks in a channel is zero, two consecutive elements take the same
-         * value. This can lead to a division by zero in get_chanxy_cost_fac_(). To avoid this
-         * potential issue, we assume that the channel width is at least 1.
-         */
-        if (chan_x_width == 0)
+        // If the number of tracks in a channel is zero, two consecutive elements take the same
+        // value. This can lead to a division by zero in get_chanxy_cost_fac_(). To avoid this
+        // potential issue, we assume that the channel width is at least 1.
+        if (chan_x_width == 0) {
             return 1;
+        }
 
         return chan_x_width;
     });
 
-    acc_chany_width_ = vtr::PrefixSum1D<int>(grid_width, [&](size_t x) noexcept {
+    acc_chan_width_.y = vtr::PrefixSum1D<int>(grid_width, [&](size_t x) noexcept {
         int chan_y_width = device_ctx.chan_width.y_list[x];
 
         // to avoid a division by zero
-        if (chan_y_width == 0)
+        if (chan_y_width == 0) {
             return 1;
+        }
 
         return chan_y_width;
     });
@@ -276,8 +277,8 @@ std::tuple<double, double, double> NetCostHandler::comp_cube_bb_cong_cost_(e_cos
 
     for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) {
         if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
-            /* Small nets don't use incremental updating on their bounding boxes,
-             * so they can use a fast bounding box calculator. */
+            // Small nets don't use incremental updating on their bounding boxes,
+            // so they can use a fast bounding box calculator.
             if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET && method == e_cost_methods::NORMAL) {
                 get_bb_from_scratch_(net_id, /*use_ts=*/false);
             } else {
@@ -564,8 +565,8 @@ void NetCostHandler::get_non_updatable_cube_bb_(ClusterNetId net_id, bool use_ts
     if (congestion_modeling_started_) {
         auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id];
         const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1);
-        x_chan_util = acc_chanx_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels;
-        y_chan_util = acc_chany_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels;
+        x_chan_util = acc_chan_util_.x.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels;
+        y_chan_util = acc_chan_util_.y.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels;
     }
 }
 
@@ -877,8 +878,8 @@ void NetCostHandler::update_bb_(ClusterNetId net_id,
     if (congestion_modeling_started_) {
         auto& [x_chan_util, y_chan_util] = ts_avg_chann_util_new_[net_id];
         const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1);
-        x_chan_util = acc_chanx_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels;
-        y_chan_util = acc_chany_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels;
+        x_chan_util = acc_chan_util_.x.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels;
+        y_chan_util = acc_chan_util_.y.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels;
     }
 }
 
@@ -1318,8 +1319,8 @@ void NetCostHandler::get_bb_from_scratch_(ClusterNetId net_id, bool use_ts) {
     if (congestion_modeling_started_) {
         auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id];
         const int total_channels = (coords.xmax - coords.xmin + 1) * (coords.ymax - coords.ymin + 1);
-        x_chan_util = acc_chanx_util_.get_sum(coords.xmin, coords.ymin, coords.xmax, coords.ymax) / total_channels;
-        y_chan_util = acc_chany_util_.get_sum(coords.xmin, coords.ymin, coords.xmax, coords.ymax) / total_channels;
+        x_chan_util = acc_chan_util_.x.get_sum(coords.xmin, coords.ymin, coords.xmax, coords.ymax) / total_channels;
+        y_chan_util = acc_chan_util_.y.get_sum(coords.xmin, coords.ymin, coords.xmax, coords.ymax) / total_channels;
     }
 }
 
@@ -1725,8 +1726,8 @@ double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost /
     const size_t grid_height = device_ctx.grid.height();
     const size_t num_layers = device_ctx.grid.get_num_layers();
 
-    chanx_util_.fill(0.);
-    chany_util_.fill(0.);
+    chan_util_.x.fill(0.);
+    chan_util_.y.fill(0.);
 
     // For each net, this function estimates routing channel utilization by distributing
     // the net's expected wirelength across its bounding box. The expected wirelength
@@ -1756,8 +1757,8 @@ double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost /
                 for (int layer = bb.layer_min; layer <= bb.layer_max; layer++) {
                     for (int x = bb.xmin; x <= bb.xmax; x++) {
                         for (int y = bb.ymin; y <= bb.ymax; y++) {
-                            chanx_util_[layer][x][y] += expected_per_x_segment_wl;
-                            chany_util_[layer][x][y] += expected_per_y_segment_wl;
+                            chan_util_.x[layer][x][y] += expected_per_x_segment_wl;
+                            chan_util_.y[layer][x][y] += expected_per_y_segment_wl;
                         }
                     }
                 }
@@ -1785,8 +1786,8 @@ double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost /
 
                     for (int x = bb[layer].xmin; x <= bb[layer].xmax; x++) {
                         for (int y = bb[layer].ymin; y <= bb[layer].ymax; y++) {
-                            chanx_util_[layer][x][y] += expected_per_x_segment_wl;
-                            chany_util_[layer][x][y] += expected_per_y_segment_wl;
+                            chan_util_.x[layer][x][y] += expected_per_x_segment_wl;
+                            chan_util_.y[layer][x][y] += expected_per_y_segment_wl;
                         }
                     }
                 }
@@ -1795,30 +1796,30 @@ double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost /
     }
 
     // Channel width is computed only once and reused in later calls.
-    if (chanx_width_.empty()) {
-        VTR_ASSERT(chany_width_.empty());
-        std::tie(chanx_width_, chany_width_) = calculate_channel_width();
+    if (chan_width_.x.empty()) {
+        VTR_ASSERT(chan_width_.y.empty());
+        std::tie(chan_width_.x, chan_width_.y) = calculate_channel_width();
     }
 
-    VTR_ASSERT(chanx_util_.size() == chany_util_.size());
-    VTR_ASSERT(chanx_util_.size() == chanx_width_.size());
-    VTR_ASSERT(chany_util_.size() == chany_width_.size());
+    VTR_ASSERT(chan_util_.x.size() == chan_util_.y.size());
+    VTR_ASSERT(chan_util_.x.size() == chan_width_.x.size());
+    VTR_ASSERT(chan_util_.y.size() == chan_width_.y.size());
 
     for (size_t layer = 0; layer < num_layers; ++layer) {
         for (size_t x = 0; x < grid_width; ++x) {
             for (size_t y = 0; y < grid_height; ++y) {
-                if (chanx_width_[layer][x][y] > 0) {
-                    chanx_util_[layer][x][y] /= chanx_width_[layer][x][y];
+                if (chan_width_.x[layer][x][y] > 0) {
+                    chan_util_.x[layer][x][y] /= chan_width_.x[layer][x][y];
                 } else {
-                    VTR_ASSERT_SAFE(chanx_width_[layer][x][y] == 0);
-                    chanx_util_[layer][x][y] = 1.;
+                    VTR_ASSERT_SAFE(chan_width_.x[layer][x][y] == 0);
+                    chan_util_.x[layer][x][y] = 1.;
                 }
 
-                if (chany_width_[layer][x][y] > 0) {
-                    chany_util_[layer][x][y] /= chany_width_[layer][x][y];
+                if (chan_width_.y[layer][x][y] > 0) {
+                    chan_util_.y[layer][x][y] /= chan_width_.y[layer][x][y];
                 } else {
-                    VTR_ASSERT_SAFE(chany_width_[layer][x][y] == 0);
-                    chany_util_[layer][x][y] = 1.;
+                    VTR_ASSERT_SAFE(chan_width_.y[layer][x][y] == 0);
+                    chan_util_.y[layer][x][y] = 1.;
                 }
             }
         }
@@ -1826,16 +1827,16 @@ double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost /
 
     // For now, congestion modeling in the placement stage is limited to a single die
     // TODO: extend it to multiple dice
-    acc_chanx_util_ = vtr::PrefixSum2D<double>(grid_width,
-                                               grid_height,
-                                               [&](size_t x, size_t y) {
-                                                   return chanx_util_[0][x][y];
-                                               });
+    acc_chan_util_.x = vtr::PrefixSum2D<double>(grid_width,
+                                                grid_height,
+                                                [&](size_t x, size_t y) {
+                                                    return chan_util_.x[0][x][y];
+                                                });
 
-    acc_chany_util_ = vtr::PrefixSum2D<double>(grid_width,
+    acc_chan_util_.y = vtr::PrefixSum2D<double>(grid_width,
                                                grid_height,
                                                [&](size_t x, size_t y) {
-                                                   return chany_util_[0][x][y];
+                                                   return chan_util_.y[0][x][y];
                                                });
 
     congestion_modeling_started_ = true;
@@ -1855,7 +1856,7 @@ double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost /
 }
 
 std::pair<const vtr::NdMatrix<double, 3>&, const vtr::NdMatrix<double, 3>&> NetCostHandler::get_chanxy_util() const {
-    return {chanx_util_, chany_util_};
+    return {chan_util_.x, chan_util_.y};
 }
 
 void NetCostHandler::set_ts_bb_coord_(const ClusterNetId net_id) {
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index 1970f49977c..05fa6ba1ace 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -35,6 +35,13 @@ enum class e_cost_methods {
     CHECK
 };
 
+template<typename T>
+struct ChannelData {
+    T x;
+    T y;
+    // TODO: add Z dimension
+};
+
 class NetCostHandler {
   public:
     NetCostHandler() = delete;
@@ -259,17 +266,13 @@ class NetCostHandler {
      * number of tracks in that direction; for other cost functions they
      * will never be used.
      */
-    vtr::PrefixSum1D<int> acc_chanx_width_; // [0..device_ctx.grid.width()-1]
-    vtr::PrefixSum1D<int> acc_chany_width_; // [0..device_ctx.grid.height()-1]
+     ChannelData<vtr::PrefixSum1D<int>> acc_chan_width_;
 
-    vtr::PrefixSum2D<double> acc_chanx_util_;
-    vtr::PrefixSum2D<double> acc_chany_util_;
+     ChannelData<vtr::PrefixSum2D<double>> acc_chan_util_;
 
-    vtr::NdMatrix<double, 3> chanx_util_;
-    vtr::NdMatrix<double, 3> chany_util_;
+     ChannelData<vtr::NdMatrix<double, 3>> chan_util_;
 
-    vtr::NdMatrix<int, 3> chanx_width_;
-    vtr::NdMatrix<int, 3> chany_width_;
+     ChannelData<vtr::NdMatrix<int, 3>> chan_width_;
 
     /**
      * @brief The matrix below is used to calculate a chanz_place_cost_fac based on the average channel width in 
@@ -585,10 +588,10 @@ class NetCostHandler {
      */
     template<typename BBT>
     std::pair<double, double> get_chanxy_cost_fac_(const BBT& bb) {
-        const int total_chanx_width = acc_chanx_width_.get_sum(bb.ymin, bb.ymax);
+        const int total_chanx_width = acc_chan_width_.x.get_sum(bb.ymin, bb.ymax);
         const double inverse_average_chanx_width = (bb.ymax - bb.ymin + 1.0) / total_chanx_width;
 
-        const int total_chany_width = acc_chany_width_.get_sum(bb.xmin, bb.xmax);
+        const int total_chany_width = acc_chan_width_.y.get_sum(bb.xmin, bb.xmax);
         const double inverse_average_chany_width = (bb.xmax - bb.xmin + 1.0) / total_chany_width;
 
         return {inverse_average_chanx_width, inverse_average_chany_width};

From 04a84cde8d64e4dbeb21a60b46500bcb24e1e823 Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Tue, 8 Jul 2025 19:20:53 -0400
Subject: [PATCH 42/66] return ChannelData from get_chan_util()

---
 vpr/src/place/net_cost_handler.cpp  |  4 ++--
 vpr/src/place/net_cost_handler.h    |  2 +-
 vpr/src/route/route_common.cpp      | 17 +++++++----------
 vpr/src/route/route_utilization.cpp | 24 +++++++++++++-----------
 vpr/src/route/route_utilization.h   |  2 +-
 5 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index b88fbc87dad..ab95a14b7d3 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -1855,8 +1855,8 @@ double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost /
     return cong_cost;
 }
 
-std::pair<const vtr::NdMatrix<double, 3>&, const vtr::NdMatrix<double, 3>&> NetCostHandler::get_chanxy_util() const {
-    return {chan_util_.x, chan_util_.y};
+const ChannelData<vtr::NdMatrix<double, 3>>& NetCostHandler::get_chan_util() const {
+    return chan_util_;
 }
 
 void NetCostHandler::set_ts_bb_coord_(const ClusterNetId net_id) {
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index 05fa6ba1ace..2bb599c19e2 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -152,7 +152,7 @@ class NetCostHandler {
      */
     double estimate_routing_chan_util(bool compute_congestion_cost = true);
 
-    std::pair<const vtr::NdMatrix<double, 3>&, const vtr::NdMatrix<double, 3>&> get_chanxy_util() const;
+    const ChannelData<vtr::NdMatrix<double, 3>>& get_chan_util() const;
 
   private:
     /// Indicates whether congestion cost modeling is enabled.
diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp
index db8be67b405..521ae9d40e9 100644
--- a/vpr/src/route/route_common.cpp
+++ b/vpr/src/route/route_common.cpp
@@ -80,15 +80,13 @@ static bool classes_in_same_block(ParentBlockId blk_id, int first_class_ptc_num,
  * @param route_opts Contains channel utilization threshold and weighting factor
  *                   used to increase initial 'acc_cost' for nodes going through
  *                   congested channels.
- * @param chanx_util Post-placement estimate of CHANX routing utilization per (layer, x, y) location.
- * @param chany_util Post-placement estimate of CHANY routing utilization per (layer, x, y) location.
+ * @param chan_util Post-placement estimate of routing channel utilization per (layer, x, y) location.
  * @return Initial `acc_cost` for the given RR node.
  */
 
 static float comp_initial_acc_cost(RRNodeId node_id,
                                    const t_router_opts& route_opts,
-                                   const vtr::NdMatrix<double, 3>& chanx_util,
-                                   const vtr::NdMatrix<double, 3>& chany_util);
+                                   const ChannelData<vtr::NdMatrix<double, 3>>& chan_util);
 
 /************************** Subroutine definitions ***************************/
 
@@ -436,8 +434,7 @@ void alloc_and_load_rr_node_route_structs(const t_router_opts& router_opts) {
 
 static float comp_initial_acc_cost(RRNodeId node_id,
                                    const t_router_opts& route_opts,
-                                   const vtr::NdMatrix<double, 3>& chanx_util,
-                                   const vtr::NdMatrix<double, 3>& chany_util) {
+                                   const ChannelData<vtr::NdMatrix<double, 3>>& chan_util) {
     const auto& rr_graph = g_vpr_ctx.device().rr_graph;
 
     // The default acc_cost is 1 for all rr_nodes. For routing wires, if they pass through a channel
@@ -457,7 +454,7 @@ static float comp_initial_acc_cost(RRNodeId node_id,
             int y = rr_graph.node_ylow(node_id);
             int layer = rr_graph.node_layer(node_id);
             for (int x = rr_graph.node_xlow(node_id); x <= rr_graph.node_xhigh(node_id); x++) {
-                max_util = std::max(max_util, chanx_util[layer][x][y]);
+                max_util = std::max(max_util, chan_util.x[layer][x][y]);
             }
 
         } else {
@@ -465,7 +462,7 @@ static float comp_initial_acc_cost(RRNodeId node_id,
             int x = rr_graph.node_xlow(node_id);
             int layer = rr_graph.node_layer(node_id);
             for (int y = rr_graph.node_ylow(node_id); y <= rr_graph.node_yhigh(node_id); y++) {
-                max_util = std::max(max_util, chany_util[layer][x][y]);
+                max_util = std::max(max_util, chan_util.y[layer][x][y]);
             }
         }
 
@@ -485,13 +482,13 @@ void reset_rr_node_route_structs(const t_router_opts& route_opts) {
     VTR_ASSERT(route_ctx.rr_node_route_inf.size() == size_t(device_ctx.rr_graph.num_nodes()));
 
     RoutingChanUtilEstimator routing_chan_util_estimator(blk_loc_registry);
-    const auto [chanx_util, chany_util] = routing_chan_util_estimator.estimate_routing_chan_util();
+    const ChannelData<vtr::NdMatrix<double, 3>> chan_util = routing_chan_util_estimator.estimate_routing_chan_util();
 
     for (const RRNodeId rr_id : device_ctx.rr_graph.nodes()) {
         t_rr_node_route_inf& node_inf = route_ctx.rr_node_route_inf[rr_id];
 
         node_inf.prev_edge = RREdgeId::INVALID();
-        node_inf.acc_cost = comp_initial_acc_cost(rr_id, route_opts, chanx_util, chany_util);
+        node_inf.acc_cost = comp_initial_acc_cost(rr_id, route_opts, chan_util);
         node_inf.path_cost = std::numeric_limits<float>::infinity();
         node_inf.backward_path_cost = std::numeric_limits<float>::infinity();
         node_inf.set_occ(0);
diff --git a/vpr/src/route/route_utilization.cpp b/vpr/src/route/route_utilization.cpp
index 990559269d1..531382352d8 100644
--- a/vpr/src/route/route_utilization.cpp
+++ b/vpr/src/route/route_utilization.cpp
@@ -13,7 +13,7 @@ RoutingChanUtilEstimator::RoutingChanUtilEstimator(const BlkLocRegistry& blk_loc
     net_cost_handler_ = std::make_unique<NetCostHandler>(placer_opts_, *placer_state_, /*cube_bb=*/true);
 }
 
-std::pair<vtr::NdMatrix<double, 3>, vtr::NdMatrix<double, 3>> RoutingChanUtilEstimator::estimate_routing_chan_util() {
+ChannelData<vtr::NdMatrix<double, 3>> RoutingChanUtilEstimator::estimate_routing_chan_util() {
     const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist;
     const auto& block_locs = placer_state_->block_locs();
 
@@ -26,21 +26,23 @@ std::pair<vtr::NdMatrix<double, 3>, vtr::NdMatrix<double, 3>> RoutingChanUtilEst
         // Estimate routing channel utilization using
         net_cost_handler_->estimate_routing_chan_util(/*compute_congestion_cost=*/false);
 
-        return net_cost_handler_->get_chanxy_util();
+        return net_cost_handler_->get_chan_util();
     } else {
         const auto& device_ctx = g_vpr_ctx.device();
 
-        auto chanx_util = vtr::NdMatrix<double, 3>({{(size_t)device_ctx.grid.get_num_layers(),
-                                                     device_ctx.grid.width(),
-                                                     device_ctx.grid.height()}},
-                                                   0);
+        ChannelData<vtr::NdMatrix<double, 3>> chan_util;
 
-        auto chany_util = vtr::NdMatrix<double, 3>({{(size_t)device_ctx.grid.get_num_layers(),
-                                                     device_ctx.grid.width(),
-                                                     device_ctx.grid.height()}},
-                                                   0);
+        chan_util.x = vtr::NdMatrix<double, 3>({{(size_t)device_ctx.grid.get_num_layers(),
+                                                 device_ctx.grid.width(),
+                                                 device_ctx.grid.height()}},
+                                               0);
 
-        return {chanx_util, chany_util};
+        chan_util.y = vtr::NdMatrix<double, 3>({{(size_t)device_ctx.grid.get_num_layers(),
+                                                 device_ctx.grid.width(),
+                                                 device_ctx.grid.height()}},
+                                               0);
+
+        return chan_util;
     }
 }
 
diff --git a/vpr/src/route/route_utilization.h b/vpr/src/route/route_utilization.h
index c23897efe6d..5a89247eaf1 100644
--- a/vpr/src/route/route_utilization.h
+++ b/vpr/src/route/route_utilization.h
@@ -16,7 +16,7 @@ class RoutingChanUtilEstimator {
   public:
     RoutingChanUtilEstimator(const BlkLocRegistry& blk_loc_registry);
 
-    std::pair<vtr::NdMatrix<double, 3>, vtr::NdMatrix<double, 3>> estimate_routing_chan_util();
+    ChannelData<vtr::NdMatrix<double, 3>> estimate_routing_chan_util();
 
   private:
     std::unique_ptr<PlacerState> placer_state_;

From 73332ab7a05b0648f63ae5aa0acae8d33f91a4cb Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Tue, 8 Jul 2025 19:34:47 -0400
Subject: [PATCH 43/66] doxygen comments for congestion related matrices in
 NetCostHandler

---
 vpr/src/base/stats.h                |  4 +++-
 vpr/src/place/net_cost_handler.cpp  |  2 +-
 vpr/src/place/net_cost_handler.h    | 24 +++++++++++++++++++++---
 vpr/src/route/route_utilization.cpp |  2 +-
 4 files changed, 26 insertions(+), 6 deletions(-)

diff --git a/vpr/src/base/stats.h b/vpr/src/base/stats.h
index 0f73aa13362..1d4f118721a 100644
--- a/vpr/src/base/stats.h
+++ b/vpr/src/base/stats.h
@@ -28,7 +28,7 @@ void routing_stats(const Netlist<>& net_list,
 /**
  * @brief Calculates the routing channel width at each grid location.
  *
- * Iterates through all RR nodes and counts how many wires pass through each (x, y) location
+ * Iterates through all RR nodes and counts how many wires pass through each (layer, x, y) location
  * for both horizontal (CHANX) and vertical (CHANY) channels.
  *
  * @return A pair of 3D matrices:
@@ -73,6 +73,8 @@ void print_device_utilization(const float target_device_utilization);
  *   - Occupancy percentage (occupancy / capacity)
  *   - Channel capacity
  *
+ *   TODO: extend to 3D
+ *
  * @param filename      Output file path.
  * @param occupancy     Matrix of occupancy counts.
  * @param capacity_list List of channel capacities (per y for chanx, per x for chany).
diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index ab95a14b7d3..7d9e0d5d0f8 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -1842,7 +1842,7 @@ double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost /
     congestion_modeling_started_ = true;
 
     double cong_cost = 0.;
-    // Compute congestion cost using recomputed bounding boxes and channel utilization map
+    // Compute congestion cost using computed bounding boxes and channel utilization map
     if (compute_congestion_cost) {
         for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) {
             if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index 2bb599c19e2..62218b3c448 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -143,15 +143,22 @@ class NetCostHandler {
     /**
      * @brief Estimates routing channel utilization and computes the congestion cost
      * for each net.
+     * @param compute_congestion_cost Indicates whether computing congestion cost is needed.
      *
      * For each net, distributes estimated wirelength across its bounding box
      * and accumulates demand for different routing channels. Normalizes by channel widths
      * (e.g. a value of 0.5 means 50% of the wiring in a channel is expected to be used).
      *
-     * @return Total congestion cost.
+     * @note This method assumes that net bounding boxes are already computed.
+     *
+     * @return Total congestion cost if requested.
      */
     double estimate_routing_chan_util(bool compute_congestion_cost = true);
 
+    /**
+     * @brief Returns the estimated routing channel usage for each location in the grid.
+     *        The channel usage estimates are computed in estimate_routing_chan_util().
+     */
     const ChannelData<vtr::NdMatrix<double, 3>>& get_chan_util() const;
 
   private:
@@ -268,10 +275,21 @@ class NetCostHandler {
      */
      ChannelData<vtr::PrefixSum1D<int>> acc_chan_width_;
 
-     ChannelData<vtr::PrefixSum2D<double>> acc_chan_util_;
-
+     /**
+     * @brief Estimated routing usage per channel segment,
+     *        indexed by [layer][x][y]. Values represent normalized wire demand
+     *        contribution from all nets distributed over their bounding boxes.
+     */
      ChannelData<vtr::NdMatrix<double, 3>> chan_util_;
 
+     /**
+      * @brief Accumulated (prefix sum) channel utilization in each direction (x/y),
+      *        on the base layer. Enables fast computation of average utilization
+      *        over a net’s bounding box during congestion cost estimation.
+      */
+     ChannelData<vtr::PrefixSum2D<double>> acc_chan_util_;
+
+     /// Available channel width per grid location, indexed by [layer][x][y].
      ChannelData<vtr::NdMatrix<int, 3>> chan_width_;
 
     /**
diff --git a/vpr/src/route/route_utilization.cpp b/vpr/src/route/route_utilization.cpp
index 531382352d8..2cc40349183 100644
--- a/vpr/src/route/route_utilization.cpp
+++ b/vpr/src/route/route_utilization.cpp
@@ -23,7 +23,7 @@ ChannelData<vtr::NdMatrix<double, 3>> RoutingChanUtilEstimator::estimate_routing
         // Compute net bounding boxes
         net_cost_handler_->comp_bb_cong_cost(e_cost_methods::NORMAL);
 
-        // Estimate routing channel utilization using
+        // Estimate routing channel usage
         net_cost_handler_->estimate_routing_chan_util(/*compute_congestion_cost=*/false);
 
         return net_cost_handler_->get_chan_util();

From 98f30449dc5c830ed85e22648a0239a8115e5fda Mon Sep 17 00:00:00 2001
From: soheilshahrouz <soheilqs@gmail.com>
Date: Tue, 8 Jul 2025 19:55:21 -0400
Subject: [PATCH 44/66] rename *chann* to *chan* in NetCostHandler

---
 vpr/src/place/net_cost_handler.cpp | 14 +++++++-------
 vpr/src/place/net_cost_handler.h   |  4 ++--
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 7d9e0d5d0f8..273c287accb 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -105,10 +105,10 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
     if (cube_bb_) {
         ts_bb_edge_new_.resize(num_nets, t_bb());
         ts_bb_coord_new_.resize(num_nets, t_bb());
-        ts_avg_chann_util_new_.resize(num_nets, {0., 0.});
+        ts_avg_chan_util_new_.resize(num_nets, {0., 0.});
 
         bb_coords_.resize(num_nets, t_bb());
-        avg_chann_util_.resize(num_nets, {0., 0.});
+        avg_chan_util_.resize(num_nets, {0., 0.});
 
         bb_num_on_edges_.resize(num_nets, t_bb());
         comp_bb_cong_cost_functor_ = std::bind(&NetCostHandler::comp_cube_bb_cong_cost_, this, std::placeholders::_1);
@@ -563,7 +563,7 @@ void NetCostHandler::get_non_updatable_cube_bb_(ClusterNetId net_id, bool use_ts
     }
 
     if (congestion_modeling_started_) {
-        auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id];
+        auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chan_util_new_[net_id] : avg_chan_util_[net_id];
         const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1);
         x_chan_util = acc_chan_util_.x.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels;
         y_chan_util = acc_chan_util_.y.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels;
@@ -876,7 +876,7 @@ void NetCostHandler::update_bb_(ClusterNetId net_id,
     }
 
     if (congestion_modeling_started_) {
-        auto& [x_chan_util, y_chan_util] = ts_avg_chann_util_new_[net_id];
+        auto& [x_chan_util, y_chan_util] = ts_avg_chan_util_new_[net_id];
         const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1);
         x_chan_util = acc_chan_util_.x.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels;
         y_chan_util = acc_chan_util_.y.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels;
@@ -1317,7 +1317,7 @@ void NetCostHandler::get_bb_from_scratch_(ClusterNetId net_id, bool use_ts) {
     num_on_edges.layer_max = layer_max_edge;
 
     if (congestion_modeling_started_) {
-        auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id];
+        auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chan_util_new_[net_id] : avg_chan_util_[net_id];
         const int total_channels = (coords.xmax - coords.xmin + 1) * (coords.ymax - coords.ymin + 1);
         x_chan_util = acc_chan_util_.x.get_sum(coords.xmin, coords.ymin, coords.xmax, coords.ymax) / total_channels;
         y_chan_util = acc_chan_util_.y.get_sum(coords.xmin, coords.ymin, coords.xmax, coords.ymax) / total_channels;
@@ -1418,7 +1418,7 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) {
 
 double NetCostHandler::get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts) {
     VTR_ASSERT_SAFE(congestion_modeling_started_);
-    const auto [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id];
+    const auto [x_chan_util, y_chan_util] = use_ts ? ts_avg_chan_util_new_[net_id] : avg_chan_util_[net_id];
 
     const float threshold = placer_opts_.congestion_chan_util_threshold;
 
@@ -1862,7 +1862,7 @@ const ChannelData<vtr::NdMatrix<double, 3>>& NetCostHandler::get_chan_util() con
 void NetCostHandler::set_ts_bb_coord_(const ClusterNetId net_id) {
     if (cube_bb_) {
         bb_coords_[net_id] = ts_bb_coord_new_[net_id];
-        avg_chann_util_[net_id] = ts_avg_chann_util_new_[net_id];
+        avg_chan_util_[net_id] = ts_avg_chan_util_new_[net_id];
     } else {
         layer_bb_coords_[net_id] = layer_ts_bb_coord_new_[net_id];
     }
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index 62218b3c448..3537182c227 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -214,7 +214,7 @@ class NetCostHandler {
     /* [0...num_affected_nets] -> net_id of the affected nets */
     std::vector<ClusterNetId> ts_nets_to_update_;
 
-    vtr::vector<ClusterNetId, std::pair<float, float>> ts_avg_chann_util_new_;
+    vtr::vector<ClusterNetId, std::pair<float, float>> ts_avg_chan_util_new_;
 
     /// Store the number of blocks on each of a net's bounding box (to allow efficient updates)
     /// [0..cluster_ctx.clb_nlist.nets().size()-1]
@@ -224,7 +224,7 @@ class NetCostHandler {
     /// [0..cluster_ctx.clb_nlist.nets().size()-1]
     vtr::vector<ClusterNetId, t_bb> bb_coords_;
 
-    vtr::vector<ClusterNetId, std::pair<float, float>> avg_chann_util_;
+    vtr::vector<ClusterNetId, std::pair<float, float>> avg_chan_util_;
 
     /// Store the number of blocks on each of a net's bounding box (to allow efficient updates)
     /// [0..cluster_ctx.clb_nlist.nets().size()-1]

From 8bedcd92cabaa9f4314a395beb494cc3f9674786 Mon Sep 17 00:00:00 2001
From: Soheil Shahrouz <soheilqs@gmail.com>
Date: Thu, 10 Jul 2025 10:43:19 -0400
Subject: [PATCH 45/66] update comments in NetCostHandler to document return
 types

---
 vpr/src/place/net_cost_handler.h | 39 +++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 13 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index 3537182c227..454bc81be65 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -62,16 +62,16 @@ class NetCostHandler {
     NetCostHandler(const t_placer_opts& placer_opts, PlacerState& placer_state, bool cube_bb);
 
     /**
-     * @brief Finds the bb cost from scratch.
-     * Done only when the placement has been radically changed
-     * (i.e. after initial placement). Otherwise find the cost
+     * @brief Finds the bb cost and congestion cost from scratch.
+     * @details Done only when the placement has been radically changed
+     * (i.e. after initial placement). Otherwise, find the cost
      * change incrementally. If method check is NORMAL, we find
      * bounding boxes that are updatable for the larger nets.
      * If method is CHECK, all bounding boxes are found via the
      * non_updateable_bb routine, to provide a cost which can be
      * used to check the correctness of the other routine.
      * @param method The method used to calculate placement cost.
-     * @return (bounding box cost of the placement, estimated wirelength)
+     * @return (bounding box cost of the placement, estimated wirelength, congestion cost)
      *
      * @note The returned estimated wirelength is valid only when method == CHECK
      */
@@ -172,7 +172,7 @@ class NetCostHandler {
     PlacerState& placer_state_;
     /// Contains some parameter that determine how the placement cost is computed.
     const t_placer_opts& placer_opts_;
-    /// Points to the proper method for computing the bounding box cost from scratch.
+    /// Points to the proper method for computing the bounding box cost, estimated wirelength and congestion cost from scratch.
     std::function<std::tuple<double, double, double>(e_cost_methods method)> comp_bb_cong_cost_functor_;
     /// Points to the proper method for updating the bounding box of a net.
     std::function<void(ClusterNetId net_id, t_physical_tile_loc pin_old_loc, t_physical_tile_loc pin_new_loc, bool is_driver)> update_bb_functor_;
@@ -259,6 +259,15 @@ class NetCostHandler {
     vtr::vector<ClusterNetId, double> net_cost_;
     vtr::vector<ClusterNetId, double> proposed_net_cost_;
 
+    /**
+     * @brief The congestion cost for each net is based on the extent to which its
+     * average routing channel utilization exceeds a predefined threshold.
+     * This is computed by measuring the average utilization within the net's
+     * bounding box and subtracting the congestion threshold.
+     * Only the excess portion contributes to the net's congestion cost.
+     * The valid range is [0...cluster_ctx.clb_nlist.nets().size()-1] when
+     * congestion modeling is enabled. Otherwise, this vector would be empty.
+     */
     vtr::vector<ClusterNetId, double> net_cong_cost_;
     vtr::vector<ClusterNetId, double> proposed_net_cong_cost_;
 
@@ -335,7 +344,8 @@ class NetCostHandler {
     /**
      * @brief Calculates and returns the total bb (wirelength) cost change that would result from moving the blocks
      * indicated in the blocks_affected data structure.
-     * @param bb_delta_c Cost difference after and before moving the block
+     * @param bb_delta_c Bounding box cost difference after and before moving the block.
+     * @param congestion_delta_c Congestion cost difference after and before moving the block.
      */
     void set_bb_delta_cost_(double& bb_delta_c, double& congestion_delta_c);
 
@@ -352,7 +362,7 @@ class NetCostHandler {
 
     /**
      * @brief Allocates and loads acc_tile_num_inter_die_conn_ which contains the accumulative number of inter-die
-     * conntections.
+     * connections.
      *
      * @details This is only useful for multi-die FPGAs.
      */
@@ -544,17 +554,18 @@ class NetCostHandler {
      * @brief Computes the bounding box from scratch using 2D bounding boxes (per-layer mode)
      * @param method The method used to calculate placement cost. Specifies whether the cost is
      * computed from scratch or incrementally.
-     * @return (bounding box cost of the placement, estimated wirelength)
-     *
+     * @return (bounding box cost of the placement, estimated wirelength, congestion cost)
+     * @note Congestion modeling is not supported for per-layer mode, so 0 is returned.
      * @note The returned estimated wirelength is valid only when method == CHECK
      */
     std::tuple<double, double, double> comp_per_layer_bb_cost_(e_cost_methods method);
 
     /**
      * @brief Computes the bounding box from scratch using 3D bounding boxes (cube mode)
+     *        and calculates BB cost, estimated wirelength, and congestion cost (if enabled).
      * @param method The method used to calculate placement cost. Specifies whether the cost is
-     * computed from scratch or incrementally.
-     * @return (bounding box cost of the placement, estimated wirelength)
+     *               computed from scratch or incrementally.
+     * @return (bounding box cost of the placement, estimated wirelength, congestion cost)
      *
      * @note The returned estimated wirelength is valid only when method == CHECK
      */
@@ -568,8 +579,10 @@ class NetCostHandler {
 
     /**
      * @brief To mitigate round-off errors, every once in a while, the costs of nets are summed up from scratch.
-     * This functions is called to do that for bb cost. It doesn't calculate the BBs from scratch, it would only add the costs again.
-     * @return Total bb (wirelength) cost for the placement
+     *        This function is called to do that for bb and congestion cost.
+     *        It doesn't calculate the BBs or channel usage estimate from scratch,
+     *        it would only add the costs again.
+     * @return (total bb cost, total congestion cost)
      */
     std::pair<double, double> recompute_bb_cong_cost_();
 

From 2b4aa2df7992902c0578e7ee57bc658c2a057efa Mon Sep 17 00:00:00 2001
From: Soheil Shahrouz <soheilqs@gmail.com>
Date: Thu, 10 Jul 2025 11:00:35 -0400
Subject: [PATCH 46/66] make format

---
 vpr/src/place/net_cost_handler.cpp | 10 +++++-----
 vpr/src/place/net_cost_handler.h   | 22 +++++++++++-----------
 2 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 273c287accb..60c613783c1 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -1424,7 +1424,7 @@ double NetCostHandler::get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts)
 
     float x_chan_cong = (x_chan_util < threshold) ? 0.0f : x_chan_util - threshold;
     float y_chan_cong = (y_chan_util < threshold) ? 0.0f : y_chan_util - threshold;
-    
+
     return x_chan_cong + y_chan_cong;
 }
 
@@ -1834,10 +1834,10 @@ double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost /
                                                 });
 
     acc_chan_util_.y = vtr::PrefixSum2D<double>(grid_width,
-                                               grid_height,
-                                               [&](size_t x, size_t y) {
-                                                   return chan_util_.y[0][x][y];
-                                               });
+                                                grid_height,
+                                                [&](size_t x, size_t y) {
+                                                    return chan_util_.y[0][x][y];
+                                                });
 
     congestion_modeling_started_ = true;
 
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index 454bc81be65..9d1714bfd3c 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -282,24 +282,24 @@ class NetCostHandler {
      * number of tracks in that direction; for other cost functions they
      * will never be used.
      */
-     ChannelData<vtr::PrefixSum1D<int>> acc_chan_width_;
+    ChannelData<vtr::PrefixSum1D<int>> acc_chan_width_;
 
-     /**
+    /**
      * @brief Estimated routing usage per channel segment,
      *        indexed by [layer][x][y]. Values represent normalized wire demand
      *        contribution from all nets distributed over their bounding boxes.
      */
-     ChannelData<vtr::NdMatrix<double, 3>> chan_util_;
+    ChannelData<vtr::NdMatrix<double, 3>> chan_util_;
 
-     /**
-      * @brief Accumulated (prefix sum) channel utilization in each direction (x/y),
-      *        on the base layer. Enables fast computation of average utilization
-      *        over a net’s bounding box during congestion cost estimation.
-      */
-     ChannelData<vtr::PrefixSum2D<double>> acc_chan_util_;
+    /**
+     * @brief Accumulated (prefix sum) channel utilization in each direction (x/y),
+     *        on the base layer. Enables fast computation of average utilization
+     *        over a net’s bounding box during congestion cost estimation.
+     */
+    ChannelData<vtr::PrefixSum2D<double>> acc_chan_util_;
 
-     /// Available channel width per grid location, indexed by [layer][x][y].
-     ChannelData<vtr::NdMatrix<int, 3>> chan_width_;
+    /// Available channel width per grid location, indexed by [layer][x][y].
+    ChannelData<vtr::NdMatrix<int, 3>> chan_width_;
 
     /**
      * @brief The matrix below is used to calculate a chanz_place_cost_fac based on the average channel width in 

From 4da31eeda423e807a3295c0b17d2cf4b1120f8c0 Mon Sep 17 00:00:00 2001
From: Soheil Shahrouz <soheilqs@gmail.com>
Date: Thu, 10 Jul 2025 11:18:30 -0400
Subject: [PATCH 47/66] lazy allocation of congestion-related data members

---
 vpr/src/place/net_cost_handler.cpp | 60 ++++++++++++++++++------------
 vpr/src/place/net_cost_handler.h   | 13 +++++--
 2 files changed, 47 insertions(+), 26 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 60c613783c1..abaf066eae3 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -94,8 +94,6 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
     , placer_opts_(placer_opts) {
     const auto& device_ctx = g_vpr_ctx.device();
 
-    const size_t grid_width = device_ctx.grid.width();
-    const size_t grid_height = device_ctx.grid.height();
     const size_t num_layers = device_ctx.grid.get_num_layers();
     const size_t num_nets = g_vpr_ctx.clustering().clb_nlist.nets().size();
 
@@ -105,10 +103,8 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
     if (cube_bb_) {
         ts_bb_edge_new_.resize(num_nets, t_bb());
         ts_bb_coord_new_.resize(num_nets, t_bb());
-        ts_avg_chan_util_new_.resize(num_nets, {0., 0.});
 
         bb_coords_.resize(num_nets, t_bb());
-        avg_chan_util_.resize(num_nets, {0., 0.});
 
         bb_num_on_edges_.resize(num_nets, t_bb());
         comp_bb_cong_cost_functor_ = std::bind(&NetCostHandler::comp_cube_bb_cong_cost_, this, std::placeholders::_1);
@@ -137,8 +133,6 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
     // negative net costs mean the cost is not valid.
     net_cost_.resize(num_nets, -1.);
     proposed_net_cost_.resize(num_nets, -1.);
-    net_cong_cost_.resize(num_nets, -1.);
-    proposed_net_cong_cost_.resize(num_nets, -1.);
 
     // Used to store costs for moves not yet made and to indicate when a net's
     // cost has been recomputed. proposed_net_cost[inet] < 0 means net's cost hasn't
@@ -147,20 +141,14 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts,
 
     alloc_and_load_chan_w_factors_for_place_cost_();
 
-    chan_util_.x = vtr::NdMatrix<double, 3>({{num_layers, grid_width, grid_height}}, 0);
-    chan_util_.y = vtr::NdMatrix<double, 3>({{num_layers, grid_width, grid_height}}, 0);
-
-    acc_chan_util_.x = vtr::PrefixSum2D<double>(grid_width,
-                                                grid_height,
-                                                [&](size_t x, size_t y) {
-                                                    return chan_util_.x[0][x][y];
-                                                });
-
-    acc_chan_util_.y = vtr::PrefixSum2D<double>(grid_width,
-                                                grid_height,
-                                                [&](size_t x, size_t y) {
-                                                    return chan_util_.y[0][x][y];
-                                                });
+    // Congestion-related data members are not allocated until congestion modeling is enabled
+    // by calling estimate_routing_chan_util().
+    VTR_ASSERT(!congestion_modeling_started_);
+    VTR_ASSERT(chan_util_.x.empty() && chan_util_.y.empty());
+    VTR_ASSERT(acc_chan_util_.x.empty() && acc_chan_util_.y.empty());
+    VTR_ASSERT(ts_avg_chan_util_new_.empty());
+    VTR_ASSERT(avg_chan_util_.empty());
+    VTR_ASSERT(net_cong_cost_.empty() && proposed_net_cong_cost_.empty());
 }
 
 void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_() {
@@ -1718,13 +1706,41 @@ double NetCostHandler::get_total_wirelength_estimate() const {
     return estimated_wirelength;
 }
 
-double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost /* = true*/) {
+double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost /*=true*/) {
     const auto& cluster_ctx = g_vpr_ctx.clustering();
     const DeviceContext& device_ctx = g_vpr_ctx.device();
 
     const size_t grid_width = device_ctx.grid.width();
     const size_t grid_height = device_ctx.grid.height();
     const size_t num_layers = device_ctx.grid.get_num_layers();
+    const size_t num_nets = g_vpr_ctx.clustering().clb_nlist.nets().size();
+
+    // Congestion-related data members are allocated the first time this method is called
+    // to enable congestion modeling. This lazy allocation helps save memory when congestion
+    // modeling is not used.
+    if (!congestion_modeling_started_) {
+        congestion_modeling_started_ = true;
+
+        chan_util_.x = vtr::NdMatrix<double, 3>({{num_layers, grid_width, grid_height}}, 0);
+        chan_util_.y = vtr::NdMatrix<double, 3>({{num_layers, grid_width, grid_height}}, 0);
+
+        acc_chan_util_.x = vtr::PrefixSum2D<double>(grid_width,
+                                                    grid_height,
+                                                    [&](size_t x, size_t y) {
+                                                        return chan_util_.x[0][x][y];
+                                                    });
+
+        acc_chan_util_.y = vtr::PrefixSum2D<double>(grid_width,
+                                                    grid_height,
+                                                    [&](size_t x, size_t y) {
+                                                        return chan_util_.y[0][x][y];
+                                                    });
+
+        ts_avg_chan_util_new_.resize(num_nets, {0., 0.});
+        avg_chan_util_.resize(num_nets, {0., 0.});
+        net_cong_cost_.resize(num_nets, -1.);
+        proposed_net_cong_cost_.resize(num_nets, -1.);
+    }
 
     chan_util_.x.fill(0.);
     chan_util_.y.fill(0.);
@@ -1839,8 +1855,6 @@ double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost /
                                                     return chan_util_.y[0][x][y];
                                                 });
 
-    congestion_modeling_started_ = true;
-
     double cong_cost = 0.;
     // Compute congestion cost using computed bounding boxes and channel utilization map
     if (compute_congestion_cost) {
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index 9d1714bfd3c..bbe1131e49b 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -553,7 +553,7 @@ class NetCostHandler {
     /**
      * @brief Computes the bounding box from scratch using 2D bounding boxes (per-layer mode)
      * @param method The method used to calculate placement cost. Specifies whether the cost is
-     * computed from scratch or incrementally.
+     *        computed from scratch or incrementally.
      * @return (bounding box cost of the placement, estimated wirelength, congestion cost)
      * @note Congestion modeling is not supported for per-layer mode, so 0 is returned.
      * @note The returned estimated wirelength is valid only when method == CHECK
@@ -588,13 +588,20 @@ class NetCostHandler {
 
     /**
      * @brief Given the 3D BB, calculate the wire-length cost of the net
-     * @param net_id ID of the net which cost is requested.
+     * @param net_id ID of the net whose cost is requested.
      * @param use_ts Specifies if the bounding box is retrieved from ts data structures
-     * or move context.
+     *               or permanent data structures.
      * @return Wirelength cost of the net
      */
     double get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts);
 
+    /**
+     * @brief Calculate the congestion cost of net using its 3D bounding box.
+     * @param net_id ID of the net whose cost is requested.
+     * @param use_ts Specifies if the bounding box is retrieved from ts data structures
+     *               or move context.
+     * @return Congestion cost of the net
+     */
     double get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts);
 
     /**

From 1f77a455ae3f07f470984deaa28fccf62f0941ad Mon Sep 17 00:00:00 2001
From: Soheil Shahrouz <soheilqs@gmail.com>
Date: Thu, 10 Jul 2025 11:48:18 -0400
Subject: [PATCH 48/66] add help messages for congestion cmd options && update
 the comment and condition for enabling congestion modeling in the annealer

---
 vpr/src/base/read_options.cpp | 20 ++++++++++++--------
 vpr/src/place/annealer.cpp    | 10 ++++++++--
 2 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index b631145c5df..cae2d51651f 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -93,6 +93,7 @@ struct ParseArchFormat {
         return {"vtr", "fpga-interchange"};
     }
 };
+
 struct ParseCircuitFormat {
     ConvertedValue<e_circuit_format> from_str(const std::string& str) {
         ConvertedValue<e_circuit_format> conv_value;
@@ -2334,9 +2335,9 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
             "Specifies the type of bounding box to be used in 3D architectures.\n"
             "\n"
             "MODE options:\n"
-            "  auto_bb     : Automatically determine the appropriate bounding box based on the connections between layers.\n"
-            "  cube_bb            : Use 3D bounding boxes.\n"
-            "  per_layer_bb     : Use per-layer bounding boxes.\n"
+            "  auto_bb      : Automatically determine the appropriate bounding box based on the connections between layers.\n"
+            "  cube_bb      : Use 3D bounding boxes.\n"
+            "  per_layer_bb : Use per-layer bounding boxes.\n"
             "\n"
             "Choose one of the available modes to define the behavior of bounding boxes in your 3D architecture. The default mode is 'automatic'.")
         .default_value("auto_bb")
@@ -2490,18 +2491,21 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
         .show_in(argparse::ShowIn::HELP_ONLY);
 
     place_timing_grp.add_argument(args.place_congestion_factor, "--congestion_factor")
-        .help("To be written")
+        .help("Weighting factor for congestion cost during placement. "
+              "Higher values prioritize congestion avoidance over bounding box and timing costs. "
+              "When set to zero, congestion modeling and optimization is disabled in the placement stage.")
         .default_value("0.0")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
     place_timing_grp.add_argument(args.place_congestion_rlim_trigger_ratio, "--congestion_rlim_trigger_ratio")
-        .help("To be written")
-        .default_value("0.0")
+        .help("Enables congestion modeling when the ratio of the current range limit to the initial range limit falls below this threshold, "
+              "provided the congestion weighting factor is non-zero.")
+        .default_value("1.0")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
     place_timing_grp.add_argument(args.place_congestion_chan_util_threshold, "--congestion_chan_util_threshold")
-        .help("To be written")
-        .default_value("1.0")
+        .help("Penalizes nets in placement whose average routing channel utilization within their bounding boxes exceeds this threshold.")
+        .default_value("0.5")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
     place_timing_grp.add_argument(args.RecomputeCritIter, "--recompute_crit_iter")
diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index 845b109cbb5..fd07ef65bf2 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -679,8 +679,14 @@ void PlacementAnnealer::outer_loop_update_timing_info() {
         outer_crit_iter_count_++;
     }
 
-    if (congestion_modeling_started_
-        || (annealing_state_.rlim / MoveGenerator::first_rlim) < placer_opts_.congestion_rlim_trigger_ratio) {
+
+    // Congestion modeling is enabled when the ratio of the current range limit to the initial range limit
+    // drops below a user-specified threshold, and the congestion cost weighting factor is non-zero.
+    // Once enabled, congestion modeling continues even if the range limit increases and the ratio
+    // rises above the threshold.
+    if ((annealing_state_.rlim / MoveGenerator::first_rlim < placer_opts_.congestion_rlim_trigger_ratio
+        && placer_opts_.congestion_factor != 0.)
+        || congestion_modeling_started_)  {
         costs_.congestion_cost = net_cost_handler_.estimate_routing_chan_util();
 
         if (!congestion_modeling_started_) {

From 34d801c362c3491330a33189307b91e8f8e75edb Mon Sep 17 00:00:00 2001
From: Soheil Shahrouz <soheilqs@gmail.com>
Date: Thu, 10 Jul 2025 11:55:13 -0400
Subject: [PATCH 49/66] clean doxygen comments in NetCostHandler by removing
 @params that no longer exist

---
 vpr/src/place/net_cost_handler.cpp |  8 ++++----
 vpr/src/place/net_cost_handler.h   | 26 ++++++++++----------------
 2 files changed, 14 insertions(+), 20 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index abaf066eae3..1e58b18aee0 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -303,10 +303,10 @@ std::tuple<double, double, double> NetCostHandler::comp_per_layer_bb_cost_(e_cos
     // TODO: compute congestion cost
     constexpr double cong_cost = 0.;
 
-    for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { /* for each net ... */
-        if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) {   /* Do only if not ignored. */
-            /* Small nets don't use incremental updating on their bounding boxes, *
-             * so they can use a fast bounding box calculator.                    */
+    for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) {
+        if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) {
+            // Small nets don't use incremental updating on their bounding boxes,
+            //so they can use a fast bounding box calculator.
             if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET && method == e_cost_methods::NORMAL) {
                 get_layer_bb_from_scratch_(net_id,
                                            layer_bb_num_on_edges_[net_id],
diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h
index bbe1131e49b..8f9d4511e1d 100644
--- a/vpr/src/place/net_cost_handler.h
+++ b/vpr/src/place/net_cost_handler.h
@@ -115,7 +115,6 @@ class NetCostHandler {
     /**
      * @brief Update net cost data structures (in placer context and net_cost in .cpp file)
      * and reset flags (proposed_net_cost and bb_updated_before).
-     * @param num_nets_affected The number of nets affected by the move.
      * It is used to determine the index up to which elements in ts_nets_to_update are valid.
      */
     void update_move_nets();
@@ -125,7 +124,6 @@ class NetCostHandler {
      * and update "costs" accordingly. It is important to note that in this function bounding box
      * and connection delays are not calculated from scratch. However, it iterates over all nets
      * and connections and updates their costs by a complete summation, rather than incrementally.
-     * @param noc_opts Contains NoC cost weighting factors.
      * @param delay_model Placement delay model. Used to compute timing cost.
      * @param criticalities Contains the clustered netlist connection criticalities.
      * Used to computed timing cost .
@@ -305,7 +303,7 @@ class NetCostHandler {
      * @brief The matrix below is used to calculate a chanz_place_cost_fac based on the average channel width in 
      * the cross-die-layer direction over a 2D (x,y) region. We don't assume the inter-die connectivity is the same at all (x,y) locations, so we
      * can't compute the full chanz_place_cost_fac for all possible (xlow,ylow)(xhigh,yhigh) without a 4D array, which would
-     * be too big: O(n^2) in circuit size. Instead we compute a prefix sum that stores the number of inter-die connections per layer from
+     * be too big: O(n^2) in circuit size. Instead, we compute a prefix sum that stores the number of inter-die connections per layer from
      * (x=0,y=0) to (x,y). Given this, we can compute the average number of inter-die connections over a (xlow,ylow) to (xhigh,yhigh) 
      * region in O(1) (by adding and subtracting 4 entries)
      */
@@ -435,10 +433,10 @@ class NetCostHandler {
      * @details This routine finds the bounding box of each net from scratch when the bounding box is of type per-layer (i.e. from
      * only the block location information). It updates the coordinate, number of pins on each edge information, and the
      * number of sinks on each layer. It should only be called when the bounding box information is not valid.
-     * @param net_id ID of the net which the moving pin belongs to
-     * @param coords Bounding box coordinates of the net. It is calculated in this function
-     * @param num_on_edges Net's number of blocks on the edges of the bounding box. It is calculated in this function.
-     * @param num_sink_pin_layer Net's number of sinks on each layer, calculated in this function.
+     * @param net_id                ID of the net which the moving pin belongs to
+     * @param coords                Bounding box coordinates of the net. It is calculated in this function
+     * @param num_on_edges          Net's number of blocks on the edges of the bounding box. It is calculated in this function.
+     * @param layer_pin_sink_count  Net's number of sinks on each layer, calculated in this function.
      */
     void get_layer_bb_from_scratch_(ClusterNetId net_id,
                                     std::vector<t_2D_bb>& num_on_edges,
@@ -473,11 +471,9 @@ class NetCostHandler {
      * Currently assumes channels on both sides of the CLBs forming the   edges of the bounding box can be used.
      * Essentially, I am assuming the pins always lie on the outside of the bounding box. The x and y coordinates
      * are the pin's x and y coordinates. IO blocks are considered to be one cell in for simplicity.
-     * @param bb_edge_new Number of blocks on the edges of the bounding box
-     * @param bb_coord_new Coordinates of the bounding box
-     * @param num_sink_pin_layer_new Number of sinks of the given net on each layer
-     * @param pin_old_loc The old location of the moving pin
-     * @param pin_new_loc The new location of the moving pin
+     * @param net_id        Net whose bounding box is to be updated.
+     * @param pin_old_loc   The old location of the moving pin
+     * @param pin_new_loc   The new location of the moving pin
      * @param is_output_pin Is the moving pin of the type output
      */
     void update_layer_bb_(ClusterNetId net_id,
@@ -650,16 +646,14 @@ class NetCostHandler {
     /**
      * @brief Given the 3D BB, calculate the wire-length estimate of the net
      * @param net_id ID of the net which wirelength estimate is requested
-     * @param bb Bounding box of the net
      * @return Wirelength estimate of the net
      */
     double get_net_wirelength_estimate_(ClusterNetId net_id) const;
 
     /**
      * @brief Given the per-layer BB, calculate the wire-length estimate of the net on each layer
-     * and return the sum of the lengths
-     * @param bb Per-layer BB of the net
-     * @param net_layer_pin_sink_count Number of sink pins on each layer for the net
+     *        and return the sum of the lengths
+     * @param net_id Net whose weirelength is to be estimated.
      * @return Wirelength estimate of the net
      */
     double get_net_wirelength_from_layer_bb_(ClusterNetId net_id) const;

From e043a1fdca99eb5fe46490c1be5c7b1146f71c6e Mon Sep 17 00:00:00 2001
From: Soheil Shahrouz <soheilqs@gmail.com>
Date: Thu, 10 Jul 2025 12:05:58 -0400
Subject: [PATCH 50/66] fix segfault by guarding access to avg_chan_util_

---
 vpr/src/place/net_cost_handler.cpp | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 1e58b18aee0..923361668ec 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -1876,7 +1876,9 @@ const ChannelData<vtr::NdMatrix<double, 3>>& NetCostHandler::get_chan_util() con
 void NetCostHandler::set_ts_bb_coord_(const ClusterNetId net_id) {
     if (cube_bb_) {
         bb_coords_[net_id] = ts_bb_coord_new_[net_id];
-        avg_chan_util_[net_id] = ts_avg_chan_util_new_[net_id];
+        if (congestion_modeling_started_) {
+            avg_chan_util_[net_id] = ts_avg_chan_util_new_[net_id];
+        }
     } else {
         layer_bb_coords_[net_id] = layer_ts_bb_coord_new_[net_id];
     }

From a4e318eb17053ea9d9db6b239108d981b1d20b4b Mon Sep 17 00:00:00 2001
From: Soheil Shahrouz <soheilqs@gmail.com>
Date: Thu, 10 Jul 2025 12:14:02 -0400
Subject: [PATCH 51/66] explain the logic behind not starting congestion
 modeling early in the anneal

---
 vpr/src/place/annealer.cpp         | 10 +++++++---
 vpr/src/place/net_cost_handler.cpp |  1 +
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp
index fd07ef65bf2..ece401daa80 100644
--- a/vpr/src/place/annealer.cpp
+++ b/vpr/src/place/annealer.cpp
@@ -679,14 +679,18 @@ void PlacementAnnealer::outer_loop_update_timing_info() {
         outer_crit_iter_count_++;
     }
 
-
     // Congestion modeling is enabled when the ratio of the current range limit to the initial range limit
     // drops below a user-specified threshold, and the congestion cost weighting factor is non-zero.
     // Once enabled, congestion modeling continues even if the range limit increases and the ratio
     // rises above the threshold.
+    //
+    // This logic is motivated by the observation that enabling congestion modeling too early in the
+    // anneal increases computational overhead and introduces noise into the placement cost function,
+    // as early placements are typically highly congested and unstable. So, we delay congestion modeling
+    // until the placement is more settled and wirelength has been reasonably optimized.
     if ((annealing_state_.rlim / MoveGenerator::first_rlim < placer_opts_.congestion_rlim_trigger_ratio
-        && placer_opts_.congestion_factor != 0.)
-        || congestion_modeling_started_)  {
+         && placer_opts_.congestion_factor != 0.)
+        || congestion_modeling_started_) {
         costs_.congestion_cost = net_cost_handler_.estimate_routing_chan_util();
 
         if (!congestion_modeling_started_) {
diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 923361668ec..62514aa8670 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -301,6 +301,7 @@ std::tuple<double, double, double> NetCostHandler::comp_per_layer_bb_cost_(e_cos
     double cost = 0.;
     double expected_wirelength = 0.;
     // TODO: compute congestion cost
+    // Congestion modeling is not supported for per-layer mode, so 0 is returned.
     constexpr double cong_cost = 0.;
 
     for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) {

From d2974d86941f1214f9a94224727a8ef2644af7c5 Mon Sep 17 00:00:00 2001
From: Soheil Shahrouz <soheilqs@gmail.com>
Date: Mon, 14 Jul 2025 13:18:53 -0400
Subject: [PATCH 52/66] add a high-level comment to explain how congestion is
 modeled

---
 vpr/src/place/net_cost_handler.cpp | 30 ++++++++++++++++++++++--------
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 62514aa8670..5dcda72b474 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -2,11 +2,11 @@
  * @file net_cost_handler.cpp
  * @brief This file contains the implementation of functions used to update placement cost when a new move is proposed/committed.
  *
- * VPR placement cost consists of three terms which represent wirelength, timing, and NoC cost.
+ * VPR placement cost consists of multiple terms which represent wirelength, timing, congestion, and NoC cost.
  *
- * To get an estimation of the wirelength of each net, the Half Perimeter Wire Length (HPWL) approach is used. In this approach,
+ * To get an estimation of the wirelength of each net, the Half Perimeter Wire Length (HPWL) metric is used. In this approach,
  * half of the perimeter of the bounding box which contains all terminals of the net is multiplied by a correction factor,
- * and the resulting number is considered as an estimation of the bounding box.
+ * and the resulting number is considered as an estimation of the wirelength needed to route this net.
  *
  * Currently, we have two types of bounding boxes: 3D bounding box (or Cube BB) and per-layer bounding box.
  * If the FPGA grid is a 2D structure, a Cube bounding box is used, which will always have the z direction equal to 1. For 3D architectures,
@@ -20,6 +20,17 @@
  * To get a delay estimation of a connection (from a source to a sink), first, dx and dy between these two points should be calculated,
  * and these two numbers are the indices to access this 2D array. By default, the placement delay model is created by iterating over the router lookahead
  * to get the minimum cost for each dx and dy.
+ *
+ * For congestion modeling, we periodically estimate routing channel usage by distributing the estimated
+ * wirelength (WL) of each net across all routing channels within its bounding box. The wirelength is divided
+ * between CHANX and CHANY in proportion to the bounding box's width and height, respectively. However, all
+ * routing channels of the same type (CHANX or CHANY) within the box receive an equal share of that net's WL.
+ *
+ * We compute a congestion cost for each net by averaging the estimated utilization over all CHANX and CHANY
+ * channels in its bounding box. These average utilizations are then compared to a user-specified threshold.
+ * If a net’s average utilization exceeds the threshold, the excess is penalized by adding a cost proportional
+ * to the amount of the exceedance.
+ *
  */
 #include "net_cost_handler.h"
 
@@ -551,6 +562,7 @@ void NetCostHandler::get_non_updatable_cube_bb_(ClusterNetId net_id, bool use_ts
         num_sink_pin_layer[pin_loc.layer_num]++;
     }
 
+    // Update average CHANX and CHANY usage for this net within its bounding box if congestion modeling is enabled
     if (congestion_modeling_started_) {
         auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chan_util_new_[net_id] : avg_chan_util_[net_id];
         const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1);
@@ -864,6 +876,7 @@ void NetCostHandler::update_bb_(ClusterNetId net_id,
         bb_update_status_[net_id] = NetUpdateState::UPDATED_ONCE;
     }
 
+    // Update average CHANX and CHANY usage for this net within its bounding box if congestion modeling is enabled
     if (congestion_modeling_started_) {
         auto& [x_chan_util, y_chan_util] = ts_avg_chan_util_new_[net_id];
         const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1);
@@ -1305,6 +1318,7 @@ void NetCostHandler::get_bb_from_scratch_(ClusterNetId net_id, bool use_ts) {
     num_on_edges.layer_min = layer_min_edge;
     num_on_edges.layer_max = layer_max_edge;
 
+    // Update average CHANX and CHANY usage for this net within its bounding box if congestion modeling is enabled
     if (congestion_modeling_started_) {
         auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chan_util_new_[net_id] : avg_chan_util_[net_id];
         const int total_channels = (coords.xmax - coords.xmin + 1) * (coords.ymax - coords.ymin + 1);
@@ -1584,12 +1598,12 @@ void NetCostHandler::find_affected_nets_and_update_costs(const PlaceDelayModel*
 
     ts_nets_to_update_.resize(0);
 
-    /* Go through all the blocks moved. */
+    // Go through all the blocks moved.
     for (const t_pl_moved_block& moving_block : blocks_affected.moved_blocks) {
         auto& affected_pins = blocks_affected.affected_pins;
         ClusterBlockId blk_id = moving_block.block_num;
 
-        /* Go through all the pins in the moved block. */
+        // Go through all the pins in the moved block.
         for (ClusterPinId blk_pin : clb_nlist.block_pins(blk_id)) {
             bool is_src_moving = false;
             if (clb_nlist.pin_type(blk_pin) == PinType::SINK) {
@@ -1606,13 +1620,13 @@ void NetCostHandler::find_affected_nets_and_update_costs(const PlaceDelayModel*
         }
     }
 
-    /* Now update the bounding box costs (since the net bounding     *
-     * boxes are up-to-date). The cost is only updated once per net. */
+    // Now update the bounding box costs (since the net bounding
+    // boxes are up-to-date). The cost is only updated once per net.
     set_bb_delta_cost_(bb_delta_c, congestion_delta_c);
 }
 
 void NetCostHandler::update_move_nets() {
-    /* update net cost functions and reset flags. */
+    // update net cost functions and reset flags.
     const auto& cluster_ctx = g_vpr_ctx.clustering();
 
     for (const ClusterNetId ts_net : ts_nets_to_update_) {

From f71d778e222e1cccc248de45ce7772614feef9f1 Mon Sep 17 00:00:00 2001
From: Soheil Shahrouz <soheilqs@gmail.com>
Date: Mon, 14 Jul 2025 13:28:11 -0400
Subject: [PATCH 53/66] add comments for non-existing channels and cube_bb
 assumption

---
 vpr/src/place/net_cost_handler.cpp  | 4 +++-
 vpr/src/route/route_common.cpp      | 2 +-
 vpr/src/route/route_utilization.cpp | 1 +
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp
index 5dcda72b474..77247f1510b 100644
--- a/vpr/src/place/net_cost_handler.cpp
+++ b/vpr/src/place/net_cost_handler.cpp
@@ -30,7 +30,6 @@
  * channels in its bounding box. These average utilizations are then compared to a user-specified threshold.
  * If a net’s average utilization exceeds the threshold, the excess is penalized by adding a cost proportional
  * to the amount of the exceedance.
- *
  */
 #include "net_cost_handler.h"
 
@@ -1836,6 +1835,9 @@ double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost /
     VTR_ASSERT(chan_util_.x.size() == chan_width_.x.size());
     VTR_ASSERT(chan_util_.y.size() == chan_width_.y.size());
 
+    // Normalize channel utilizations by dividing by the corresponding channel widths.
+    // If a channel does not exist (i.e., its width is zero), we set its utilization to 1
+    // to avoid division by zero.
     for (size_t layer = 0; layer < num_layers; ++layer) {
         for (size_t x = 0; x < grid_width; ++x) {
             for (size_t y = 0; y < grid_height; ++y) {
diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp
index 521ae9d40e9..b5785cc5fc8 100644
--- a/vpr/src/route/route_common.cpp
+++ b/vpr/src/route/route_common.cpp
@@ -83,7 +83,6 @@ static bool classes_in_same_block(ParentBlockId blk_id, int first_class_ptc_num,
  * @param chan_util Post-placement estimate of routing channel utilization per (layer, x, y) location.
  * @return Initial `acc_cost` for the given RR node.
  */
-
 static float comp_initial_acc_cost(RRNodeId node_id,
                                    const t_router_opts& route_opts,
                                    const ChannelData<vtr::NdMatrix<double, 3>>& chan_util);
@@ -481,6 +480,7 @@ void reset_rr_node_route_structs(const t_router_opts& route_opts) {
 
     VTR_ASSERT(route_ctx.rr_node_route_inf.size() == size_t(device_ctx.rr_graph.num_nodes()));
 
+    // RoutingChanUtilEstimator assumes cube bounding box
     RoutingChanUtilEstimator routing_chan_util_estimator(blk_loc_registry);
     const ChannelData<vtr::NdMatrix<double, 3>> chan_util = routing_chan_util_estimator.estimate_routing_chan_util();
 
diff --git a/vpr/src/route/route_utilization.cpp b/vpr/src/route/route_utilization.cpp
index 2cc40349183..b8cb5b4c5df 100644
--- a/vpr/src/route/route_utilization.cpp
+++ b/vpr/src/route/route_utilization.cpp
@@ -10,6 +10,7 @@ RoutingChanUtilEstimator::RoutingChanUtilEstimator(const BlkLocRegistry& blk_loc
     placer_state_->mutable_blk_loc_registry() = blk_loc_registry;
 
     placer_opts_.place_algorithm = e_place_algorithm::BOUNDING_BOX_PLACE;
+    /// RoutingChanUtilEstimator uses cube bounding box
     net_cost_handler_ = std::make_unique<NetCostHandler>(placer_opts_, *placer_state_, /*cube_bb=*/true);
 }
 

From 94cc21948329f78c9cd2e48caff71b1cd4397a95 Mon Sep 17 00:00:00 2001
From: Soheil Shahrouz <soheilqs@gmail.com>
Date: Mon, 14 Jul 2025 13:41:52 -0400
Subject: [PATCH 54/66] clean setup_vpr

---
 vpr/src/base/setup_vpr.cpp | 110 ++++++++++++++++++-------------------
 vpr/src/base/setup_vpr.h   |   6 ++
 2 files changed, 59 insertions(+), 57 deletions(-)

diff --git a/vpr/src/base/setup_vpr.cpp b/vpr/src/base/setup_vpr.cpp
index 6c04ae80056..b792ac4bf12 100644
--- a/vpr/src/base/setup_vpr.cpp
+++ b/vpr/src/base/setup_vpr.cpp
@@ -27,23 +27,61 @@
 #include "setup_vib_utils.h"
 
 static void setup_netlist_opts(const t_options& Options, t_netlist_opts& NetlistOpts);
+
+/**
+ * @brief Sets up the t_ap_opts structure based on users inputs and
+ *        on the architecture specified.
+ *
+ * Error checking, such as checking for conflicting params is assumed
+ * to be done beforehand
+ */
 static void setup_ap_opts(const t_options& options,
                           t_ap_opts& apOpts);
+
+/**
+ * @brief Sets up the t_packer_opts structure based on users inputs and
+ *        on the architecture specified.
+ *
+ * Error checking, such as checking for conflicting params is assumed
+ * to be done beforehand
+ */
 static void setup_packer_opts(const t_options& Options,
                               t_packer_opts* PackerOpts);
+
+/**
+ * @brief Sets up the s_placer_opts structure based on users input.
+ *
+ * Error checking, such as checking for conflicting params
+ * is assumed to be done beforehand
+ */
 static void setup_placer_opts(const t_options& Options,
                               t_placer_opts* PlacerOpts);
 static void setup_anneal_sched(const t_options& Options,
                                t_annealing_sched* AnnealSched);
 static void setup_router_opts(const t_options& Options, t_router_opts* RouterOpts);
+
+/**
+ * Go through all the NoC options supplied by the user and store them internally.
+ */
 static void setup_noc_opts(const t_options& Options,
                            t_noc_opts* NocOpts);
+
 static void setup_server_opts(const t_options& Options,
                               t_server_opts* ServerOpts);
 
+/**
+ * @brief Sets up routing structures.
+ *
+ * Since checks are already done, this just copies values across
+ */
 static void setup_routing_arch(const t_arch& Arch, t_det_routing_arch& RoutingArch);
 
 static void setup_timing(const t_options& Options, const bool TimingEnabled, t_timing_inf* Timing);
+
+/**
+ * @brief This loads up VPR's arch_switch_inf data by combining the switches
+ *        from the arch file with the special switches that VPR needs.
+ */
 static void setup_switches(const t_arch& Arch,
                            t_det_routing_arch& RoutingArch,
                            const std::vector<t_arch_switch_inf>& arch_switches);
@@ -72,22 +110,12 @@ static void add_intra_tile_switches();
 
 /**
  * Identify the pins that can directly reach class_inf
- * @param physical_tile
- * @param logical_block
- * @param class_inf
- * @param physical_class_num
  */
 static void do_reachability_analysis(t_physical_tile_type* physical_tile,
                                      t_logical_block_type* logical_block,
                                      t_class* class_inf,
                                      int physical_class_num);
 
-/**
- * @brief Sets VPR parameters and defaults.
- *
- * Does not do any error checking as this should have been done by
- * the various input checkers
- */
 void SetupVPR(const t_options* options,
               const bool timingenabled,
               const bool readArchFile,
@@ -306,7 +334,7 @@ void SetupVPR(const t_options* options,
 
     ShowSetup(*vpr_setup);
 
-    /* init global variables */
+    // init global variables
     vtr::out_file_prefix = options->out_file_prefix;
 
     {
@@ -348,7 +376,7 @@ void SetupVPR(const t_options* options,
 }
 
 static void setup_timing(const t_options& Options, const bool TimingEnabled, t_timing_inf* Timing) {
-    /* Don't do anything if they don't want timing */
+    // Don't do anything if they don't want timing
     if (!TimingEnabled) {
         Timing->timing_analysis_enabled = false;
         return;
@@ -358,10 +386,6 @@ static void setup_timing(const t_options& Options, const bool TimingEnabled, t_t
     Timing->SDCFile = Options.SDCFile;
 }
 
-/**
- * @brief This loads up VPR's arch_switch_inf data by combining the switches
- *        from the arch file with the special switches that VPR needs.
- */
 static void setup_switches(const t_arch& Arch,
                            t_det_routing_arch& RoutingArch,
                            const std::vector<t_arch_switch_inf>& arch_switches) {
@@ -372,10 +396,10 @@ static void setup_switches(const t_arch& Arch,
 
     find_ipin_cblock_switch_index(Arch, RoutingArch.wire_to_arch_ipin_switch, RoutingArch.wire_to_arch_ipin_switch_between_dice);
 
-    /* Depends on device_ctx.num_arch_switches */
+    // Depends on device_ctx.num_arch_switches
     RoutingArch.delayless_switch = num_arch_switches++;
 
-    /* Alloc the list now that we know the final num_arch_switches value */
+    // Alloc the list now that we know the final num_arch_switches value
     device_ctx.arch_switch_inf.resize(num_arch_switches);
     for (int iswitch = 0; iswitch < switches_to_copy; iswitch++) {
         device_ctx.arch_switch_inf[iswitch] = arch_switches[iswitch];
@@ -384,7 +408,7 @@ static void setup_switches(const t_arch& Arch,
         device_ctx.all_sw_inf[iswitch] = arch_switches[iswitch];
     }
 
-    /* Delayless switch for connecting sinks and sources with their pins. */
+    // Delayless switch for connecting sinks and sources with their pins.
     device_ctx.arch_switch_inf[RoutingArch.delayless_switch].set_type(SwitchType::MUX);
     device_ctx.arch_switch_inf[RoutingArch.delayless_switch].name = std::string(VPR_DELAYLESS_SWITCH_NAME);
     device_ctx.arch_switch_inf[RoutingArch.delayless_switch].R = 0.;
@@ -404,21 +428,15 @@ static void setup_switches(const t_arch& Arch,
 
     device_ctx.delayless_switch_idx = RoutingArch.delayless_switch;
 
-    //Warn about non-zero Cout values for the ipin switch, since these values have no effect.
-    //VPR do not model the R/C's of block internal routing connection.
-    //
-    //Note that we don't warn about the R value as it may be used to size the buffer (if buf_size_type is AUTO)
+    // Warn about non-zero Cout values for the ipin switch, since these values have no effect.
+    // VPR do not model the R/C's of block internal routing connection
+    // Note that we don't warn about the R value as it may be used to size the buffer (if buf_size_type is AUTO)
     if (device_ctx.arch_switch_inf[RoutingArch.wire_to_arch_ipin_switch].Cout != 0.) {
         VTR_LOG_WARN("Non-zero switch output capacitance (%g) has no effect when switch '%s' is used for connection block inputs\n",
                      device_ctx.arch_switch_inf[RoutingArch.wire_to_arch_ipin_switch].Cout, Arch.ipin_cblock_switch_name[0].c_str());
     }
 }
 
-/**
- * @brief Sets up routing structures.
- *
- * Since checks are already done, this just copies values across
- */
 static void setup_routing_arch(const t_arch& Arch,
                                t_det_routing_arch& RoutingArch) {
     RoutingArch.switch_block_type = Arch.sb_type;
@@ -432,10 +450,10 @@ static void setup_routing_arch(const t_arch& Arch,
         RoutingArch.directionality = Arch.Segments[0].directionality;
     }
 
-    /* copy over the switch block information */
+    // copy over the switch block information
     RoutingArch.switchblocks = Arch.switchblocks;
 
-    /* Copy the tileable routing setting */
+    // Copy the tileable routing setting
     RoutingArch.tileable = Arch.tileable;
     RoutingArch.perimeter_cb = Arch.perimeter_cb;
     RoutingArch.shrink_boundary = Arch.shrink_boundary;
@@ -569,15 +587,8 @@ static void setup_anneal_sched(const t_options& Options,
     AnnealSched->type = Options.anneal_sched_type;
 }
 
-/**
- * @brief Sets up the t_ap_opts structure based on users inputs and
- *        on the architecture specified.
- *
- * Error checking, such as checking for conflicting params is assumed
- * to be done beforehand
- */
-void setup_ap_opts(const t_options& options,
-                   t_ap_opts& apOpts) {
+static void setup_ap_opts(const t_options& options,
+                          t_ap_opts& apOpts) {
     apOpts.analytical_solver_type = options.ap_analytical_solver.value();
     apOpts.partial_legalizer_type = options.ap_partial_legalizer.value();
     apOpts.full_legalizer_type = options.ap_full_legalizer.value();
@@ -591,13 +602,6 @@ void setup_ap_opts(const t_options& options,
     apOpts.generate_mass_report = options.ap_generate_mass_report.value();
 }
 
-/**
- * @brief Sets up the t_packer_opts structure based on users inputs and
- *        on the architecture specified.
- *
- * Error checking, such as checking for conflicting params is assumed
- * to be done beforehand
- */
 void setup_packer_opts(const t_options& Options,
                        t_packer_opts* PackerOpts) {
     PackerOpts->output_file = Options.NetFile;
@@ -639,12 +643,6 @@ static void setup_netlist_opts(const t_options& Options, t_netlist_opts& Netlist
     NetlistOpts.netlist_verbosity = Options.netlist_verbosity;
 }
 
-/**
- * @brief Sets up the s_placer_opts structure based on users input.
- *
- * Error checking, such as checking for conflicting params
- * is assumed to be done beforehand
- */
 static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpts) {
     if (Options.do_placement) {
         PlacerOpts->doPlacement = e_stage_action::DO;
@@ -677,7 +675,7 @@ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpt
     PlacerOpts->congestion_rlim_trigger_ratio = Options.place_congestion_rlim_trigger_ratio;
     PlacerOpts->congestion_chan_util_threshold = Options.place_congestion_chan_util_threshold;
 
-    /* Depends on PlacerOpts->place_algorithm */
+    // Depends on PlacerOpts->place_algorithm
     PlacerOpts->delay_offset = Options.place_delay_offset;
     PlacerOpts->delay_ramp_delta_threshold = Options.place_delay_ramp_delta_threshold;
     PlacerOpts->delay_ramp_slope = Options.place_delay_ramp_slope;
@@ -686,7 +684,7 @@ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpt
     PlacerOpts->delay_model_type = Options.place_delay_model;
     PlacerOpts->delay_model_reducer = Options.place_delay_model_reducer;
 
-    PlacerOpts->place_freq = PLACE_ONCE; /* DEFAULT */
+    PlacerOpts->place_freq = PLACE_ONCE; // DEFAULT
 
     PlacerOpts->post_place_timing_report_file = Options.post_place_timing_report_file;
 
@@ -778,9 +776,7 @@ static void setup_power_opts(const t_options& Options, t_power_opts* power_opts,
     }
 }
 
-/*
- * Go through all the NoC options supplied by the user and store them internally.
- */
+
 static void setup_noc_opts(const t_options& Options, t_noc_opts* NocOpts) {
     // assign the noc specific options from the command line
     NocOpts->noc = Options.noc;
diff --git a/vpr/src/base/setup_vpr.h b/vpr/src/base/setup_vpr.h
index f72bb231bd3..7364b8bb05d 100644
--- a/vpr/src/base/setup_vpr.h
+++ b/vpr/src/base/setup_vpr.h
@@ -5,6 +5,12 @@
 #include "physical_types.h"
 #include "vpr_types.h"
 
+/**
+ * @brief Sets VPR parameters and defaults.
+ *
+ * Does not do any error checking as this should have been done by
+ * the various input checkers
+ */
 void SetupVPR(const t_options* Options,
               const bool TimingEnabled,
               const bool readArchFile,

From be2cc281f110a057469fbdde003948a694ee603e Mon Sep 17 00:00:00 2001
From: Soheil Shahrouz <soheilqs@gmail.com>
Date: Wed, 16 Jul 2025 11:20:45 -0400
Subject: [PATCH 55/66] make format

---
 vpr/src/base/setup_vpr.cpp | 1 -
 1 file changed, 1 deletion(-)

diff --git a/vpr/src/base/setup_vpr.cpp b/vpr/src/base/setup_vpr.cpp
index b792ac4bf12..0f973168cde 100644
--- a/vpr/src/base/setup_vpr.cpp
+++ b/vpr/src/base/setup_vpr.cpp
@@ -776,7 +776,6 @@ static void setup_power_opts(const t_options& Options, t_power_opts* power_opts,
     }
 }
 
-
 static void setup_noc_opts(const t_options& Options, t_noc_opts* NocOpts) {
     // assign the noc specific options from the command line
     NocOpts->noc = Options.noc;

From 58413b7ee737dd30bd12174f6877b1b9435be08a Mon Sep 17 00:00:00 2001
From: Soheil Shahrouz <soheilqs@gmail.com>
Date: Mon, 21 Jul 2025 11:09:28 -0400
Subject: [PATCH 56/66] call clean_floorplanning_context_post_place() outside
 try_place()

---
 vpr/src/base/place_and_route.cpp    |  2 ++
 vpr/src/base/setup_vpr.cpp          |  2 +-
 vpr/src/base/vpr_api.cpp            | 12 +++++++-----
 vpr/src/place/place.cpp             | 26 +++++++++++---------------
 vpr/src/place/place_constraints.cpp |  2 +-
 5 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/vpr/src/base/place_and_route.cpp b/vpr/src/base/place_and_route.cpp
index 138d411539d..b7145f3ef09 100644
--- a/vpr/src/base/place_and_route.cpp
+++ b/vpr/src/base/place_and_route.cpp
@@ -360,6 +360,8 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list,
         }
     }
 
+    g_vpr_ctx.mutable_floorplanning().clean_floorplanning_context_post_place();
+
     // End binary search verification.
     // Restore the best placement (if necessary), the best routing, and the
     // best channel widths for final drawing and statistics output.
diff --git a/vpr/src/base/setup_vpr.cpp b/vpr/src/base/setup_vpr.cpp
index 4f2cc967bca..4148bdeac4b 100644
--- a/vpr/src/base/setup_vpr.cpp
+++ b/vpr/src/base/setup_vpr.cpp
@@ -683,7 +683,7 @@ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpt
     PlacerOpts->delay_model_type = Options.place_delay_model;
     PlacerOpts->delay_model_reducer = Options.place_delay_model_reducer;
 
-    PlacerOpts->place_freq = PLACE_ONCE; /* DEFAULT */
+    PlacerOpts->place_freq = PLACE_ALWAYS; /* DEFAULT */
 
     PlacerOpts->post_place_timing_report_file = Options.post_place_timing_report_file;
 
diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp
index dcd0d2394c9..7ba9580ef9d 100644
--- a/vpr/src/base/vpr_api.cpp
+++ b/vpr/src/base/vpr_api.cpp
@@ -726,7 +726,7 @@ void vpr_load_packing(const t_vpr_setup& vpr_setup, const t_arch& arch) {
     auto& cluster_ctx = g_vpr_ctx.mutable_clustering();
     const AtomContext& atom_ctx = g_vpr_ctx.atom();
 
-    /* Ensure we have a clean start with void net remapping information */
+    // Ensure we have a clean start with void net remapping information
     cluster_ctx.post_routing_clb_pin_nets.clear();
     cluster_ctx.pre_routing_net_pin_mapping.clear();
 
@@ -735,7 +735,7 @@ void vpr_load_packing(const t_vpr_setup& vpr_setup, const t_arch& arch) {
                                          vpr_setup.FileNameOpts.verify_file_digests,
                                          vpr_setup.PackerOpts.pack_verbosity);
 
-    /* Load the mapping between clusters and their atoms */
+    // Load the mapping between clusters and their atoms
     init_clb_atoms_lookup(cluster_ctx.atoms_lookup, atom_ctx, cluster_ctx.clb_nlist);
 
     process_constant_nets(g_vpr_ctx.mutable_atom().mutable_netlist(),
@@ -749,14 +749,14 @@ void vpr_load_packing(const t_vpr_setup& vpr_setup, const t_arch& arch) {
         report_packing_pin_usage(ofs, g_vpr_ctx);
     }
 
-    // Ater the clustered netlist has been loaded, update the floorplanning
+    // After the clustered netlist has been loaded, update the floorplanning
     // constraints with the new information.
     g_vpr_ctx.mutable_floorplanning().update_floorplanning_context_post_pack();
 
     /* Sanity check the resulting netlist */
     check_netlist(vpr_setup.PackerOpts.pack_verbosity);
 
-    // Independently verify the clusterings to ensure the clustering can be
+    // Independently verify the clustering to ensure the clustering can be
     // used for the rest of the VPR flow.
     unsigned num_errors = verify_clustering(g_vpr_ctx);
     if (num_errors == 0) {
@@ -768,7 +768,7 @@ void vpr_load_packing(const t_vpr_setup& vpr_setup, const t_arch& arch) {
                   num_errors);
     }
 
-    /* Output the netlist stats to console and optionally to file. */
+    // Output the netlist stats to console and optionally to file.
     writeClusteredNetlistStats(vpr_setup.FileNameOpts.write_block_usage);
 
     // print the total number of used physical blocks for each
@@ -887,6 +887,8 @@ void vpr_place(const Netlist<>& net_list,
               g_vpr_ctx.atom().flat_placement_info(),
               is_flat);
 
+    g_vpr_ctx.mutable_floorplanning().clean_floorplanning_context_post_place();
+
     auto& filename_opts = vpr_setup.FileNameOpts;
     auto& cluster_ctx = g_vpr_ctx.clustering();
     const auto& block_locs = g_vpr_ctx.placement().block_locs();
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index c5d46b5af3f..6f2ebb89e0f 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -38,10 +38,9 @@ void try_place(const Netlist<>& net_list,
                const FlatPlacementInfo& flat_placement_info,
                bool is_flat) {
 
-    /* Currently, the functions that require is_flat as their parameter and are called during placement should
-     * receive is_flat as false. For example, if the RR graph of router lookahead is built here, it should be as
-     * if is_flat is false, even if is_flat is set to true from the command line.
-     */
+    // Currently, the functions that require is_flat as their parameter and are called during placement should
+    // receive is_flat as false. For example, if the RR graph of router lookahead is built here, it should be as
+    // if is_flat is false, even if is_flat is set to true from the command line
     VTR_ASSERT(!is_flat);
     const auto& device_ctx = g_vpr_ctx.device();
     const auto& cluster_ctx = g_vpr_ctx.clustering();
@@ -74,9 +73,9 @@ void try_place(const Netlist<>& net_list,
         normalize_noc_cost_weighting_factor(const_cast<t_noc_opts&>(noc_opts));
     }
 
-    /* Placement delay model is independent of the placement and can be shared across
-     * multiple placers if we are performing parallel annealing.
-     * So, it is created and initialized once. */
+    // Placement delay model is independent of the placement and can be shared across
+    // multiple placers if we are performing parallel annealing.
+    // So, it is created and initialized once. */
     std::shared_ptr<PlaceDelayModel> place_delay_model;
 
     if (placer_opts.place_algorithm.is_timing_driven()) {
@@ -101,9 +100,8 @@ void try_place(const Netlist<>& net_list,
      */
     mutable_placement.lock_loc_vars();
 
-    /* Start measuring placement time. The measured execution time will be printed
-     * when this object goes out of scope at the end of this function.
-     */
+    // Start measuring placement time. The measured execution time will be printed
+    // when this object goes out of scope at the end of this function.
     vtr::ScopedStartFinishTimer placement_timer("Placement");
 
     // Enables fast look-up pb graph pins from block pin indices
@@ -117,17 +115,15 @@ void try_place(const Netlist<>& net_list,
 
     placer.place();
 
-    /* The placer object has its own copy of block locations and doesn't update
-     * the global context directly. We need to copy its internal data structures
-     * to the global placement context before it goes out of scope.
-     */
+    // The placer object has its own copy of block locations and doesn't update
+    // the global context directly. We need to copy its internal data structures
+    // to the global placement context before it goes out of scope.
     placer.update_global_state();
 
     // Clean the variables in the placement context. This will deallocate memory
     // used by variables which were allocated in the placement context and are
     // never used outside of placement.
     mutable_placement.clean_placement_context_post_place();
-    mutable_floorplanning.clean_floorplanning_context_post_place();
 }
 
 #ifdef VERBOSE
diff --git a/vpr/src/place/place_constraints.cpp b/vpr/src/place/place_constraints.cpp
index ef867ce5b1a..fdad4813cb2 100644
--- a/vpr/src/place/place_constraints.cpp
+++ b/vpr/src/place/place_constraints.cpp
@@ -209,7 +209,7 @@ void load_cluster_constraints() {
 
     floorplanning_ctx.cluster_constraints.resize(cluster_ctx.clb_nlist.blocks().size());
 
-    for (auto cluster_id : cluster_ctx.clb_nlist.blocks()) {
+    for (ClusterBlockId cluster_id : cluster_ctx.clb_nlist.blocks()) {
         const std::unordered_set<AtomBlockId>& atoms = cluster_ctx.atoms_lookup[cluster_id];
         PartitionRegion empty_pr;
         floorplanning_ctx.cluster_constraints[cluster_id] = empty_pr;

From d8ebd1818c84eb148a14c0ada7cac3251cb4a686 Mon Sep 17 00:00:00 2001
From: Soheil Shahrouz <soheilqs@gmail.com>
Date: Mon, 21 Jul 2025 13:48:45 -0400
Subject: [PATCH 57/66] snake case, comment style, and typos

---
 vpr/src/base/read_options.cpp      | 46 +++++++++++++--------------
 vpr/src/base/read_options.h        | 51 +++++++++++++++---------------
 vpr/src/base/setup_vpr.cpp         | 20 ++++++------
 vpr/src/base/vpr_api.cpp           |  3 +-
 vpr/src/pack/verify_clustering.cpp |  2 +-
 vpr/src/place/place.cpp            | 29 ++++++++---------
 6 files changed, 75 insertions(+), 76 deletions(-)

diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 57c449b6d26..401f6f09fc4 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -2177,7 +2177,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
 
     auto& place_grp = parser.add_argument_group("placement options");
 
-    place_grp.add_argument(args.Seed, "--seed")
+    place_grp.add_argument(args.seed, "--seed")
         .help("Placement random number generator seed")
         .default_value("1")
         .show_in(argparse::ShowIn::HELP_ONLY);
@@ -2195,7 +2195,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
         .default_value("astar")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
-    place_grp.add_argument(args.PlaceInnerNum, "--inner_num")
+    place_grp.add_argument(args.place_inner_num, "--inner_num")
         .help("Controls number of moves per temperature: inner_num * num_blocks ^ (4/3)")
         .default_value("0.5")
         .show_in(argparse::ShowIn::HELP_ONLY);
@@ -2226,17 +2226,17 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
         .default_value("1.0")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
-    place_grp.add_argument(args.PlaceInitT, "--init_t")
+    place_grp.add_argument(args.place_init_t, "--init_t")
         .help("Initial temperature for manual annealing schedule")
         .default_value("100.0")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
-    place_grp.add_argument(args.PlaceExitT, "--exit_t")
+    place_grp.add_argument(args.place_exit_t, "--exit_t")
         .help("Temperature at which annealing which terminate for manual annealing schedule")
         .default_value("0.01")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
-    place_grp.add_argument(args.PlaceAlphaT, "--alpha_t")
+    place_grp.add_argument(args.place_alpha_t, "--alpha_t")
         .help(
             "Temperature scaling factor for manual annealing schedule."
             " Old temperature is multiplied by alpha_t")
@@ -2259,7 +2259,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
         .default_value("")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
-    place_grp.add_argument<e_place_algorithm, ParsePlaceAlgorithm>(args.PlaceAlgorithm, "--place_algorithm")
+    place_grp.add_argument<e_place_algorithm, ParsePlaceAlgorithm>(args.place_algorithm, "--place_algorithm")
         .help(
             "Controls which placement algorithm is used. Valid options:\n"
             " * bounding_box: Focuses purely on minimizing the bounding box wirelength of the circuit. Turns off timing analysis if specified.\n"
@@ -2269,7 +2269,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
         .choices({"bounding_box", "criticality_timing", "slack_timing"})
         .show_in(argparse::ShowIn::HELP_ONLY);
 
-    place_grp.add_argument<e_place_algorithm, ParsePlaceAlgorithm>(args.PlaceQuenchAlgorithm, "--place_quench_algorithm")
+    place_grp.add_argument<e_place_algorithm, ParsePlaceAlgorithm>(args.place_quench_algorithm, "--place_quench_algorithm")
         .help(
             "Controls which placement algorithm is used during placement quench.\n"
             "If specified, it overrides the option --place_algorithm during placement quench.\n"
@@ -2281,7 +2281,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
         .choices({"bounding_box", "criticality_timing", "slack_timing"})
         .show_in(argparse::ShowIn::HELP_ONLY);
 
-    place_grp.add_argument(args.PlaceChanWidth, "--place_chan_width")
+    place_grp.add_argument(args.place_chan_width, "--place_chan_width")
         .help(
             "Sets the assumed channel width during placement. "
             "If --place_chan_width is unspecified, but --route_chan_width is specified the "
@@ -2483,14 +2483,14 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
 
     auto& place_timing_grp = parser.add_argument_group("timing-driven placement options");
 
-    place_timing_grp.add_argument(args.PlaceTimingTradeoff, "--timing_tradeoff")
+    place_timing_grp.add_argument(args.place_timing_tradeoff, "--timing_tradeoff")
         .help(
             "Trade-off control between delay and wirelength during placement."
             " 0.0 focuses completely on wirelength, 1.0 completely on timing")
         .default_value("0.5")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
-    place_timing_grp.add_argument(args.RecomputeCritIter, "--recompute_crit_iter")
+    place_timing_grp.add_argument(args.recompute_crit_iter, "--recompute_crit_iter")
         .help("Controls how many temperature updates occur between timing analysis during placement")
         .default_value("1")
         .show_in(argparse::ShowIn::HELP_ONLY);
@@ -3449,11 +3449,11 @@ void set_conditional_defaults(t_options& args) {
      */
 
     //Which placement algorithm to use?
-    if (args.PlaceAlgorithm.provenance() != Provenance::SPECIFIED) {
+    if (args.place_algorithm.provenance() != Provenance::SPECIFIED) {
         if (args.timing_analysis) {
-            args.PlaceAlgorithm.set(e_place_algorithm::CRITICALITY_TIMING_PLACE, Provenance::INFERRED);
+            args.place_algorithm.set(e_place_algorithm::CRITICALITY_TIMING_PLACE, Provenance::INFERRED);
         } else {
-            args.PlaceAlgorithm.set(e_place_algorithm::BOUNDING_BOX_PLACE, Provenance::INFERRED);
+            args.place_algorithm.set(e_place_algorithm::BOUNDING_BOX_PLACE, Provenance::INFERRED);
         }
     }
 
@@ -3467,7 +3467,7 @@ void set_conditional_defaults(t_options& args) {
     // Check for correct options combinations
     // If you are running WLdriven placement, the RL reward function should be
     // either basic or nonPenalizing basic
-    if (args.RL_agent_placement && (args.PlaceAlgorithm == e_place_algorithm::BOUNDING_BOX_PLACE || !args.timing_analysis)) {
+    if (args.RL_agent_placement && (args.place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE || !args.timing_analysis)) {
         if (args.place_reward_fun.value() != "basic" && args.place_reward_fun.value() != "nonPenalizing_basic") {
             VTR_LOG_WARN(
                 "To use RLPlace for WLdriven placements, the reward function should be basic or nonPenalizing_basic.\n"
@@ -3478,18 +3478,18 @@ void set_conditional_defaults(t_options& args) {
     }
 
     //Which placement algorithm to use during placement quench?
-    if (args.PlaceQuenchAlgorithm.provenance() != Provenance::SPECIFIED) {
-        args.PlaceQuenchAlgorithm.set(args.PlaceAlgorithm, Provenance::INFERRED);
+    if (args.place_quench_algorithm.provenance() != Provenance::SPECIFIED) {
+        args.place_quench_algorithm.set(args.place_algorithm, Provenance::INFERRED);
     }
 
     //Place chan width follows Route chan width if unspecified
-    if (args.PlaceChanWidth.provenance() != Provenance::SPECIFIED && args.RouteChanWidth.provenance() == Provenance::SPECIFIED) {
-        args.PlaceChanWidth.set(args.RouteChanWidth.value(), Provenance::INFERRED);
+    if (args.place_chan_width.provenance() != Provenance::SPECIFIED && args.RouteChanWidth.provenance() == Provenance::SPECIFIED) {
+        args.place_chan_width.set(args.RouteChanWidth.value(), Provenance::INFERRED);
     }
 
     //Do we calculate timing info during placement?
-    if (args.ShowPlaceTiming.provenance() != Provenance::SPECIFIED) {
-        args.ShowPlaceTiming.set(args.timing_analysis, Provenance::INFERRED);
+    if (args.show_place_timing.provenance() != Provenance::SPECIFIED) {
+        args.show_place_timing.set(args.timing_analysis, Provenance::INFERRED);
     }
 
     //Slave quench recompute divider of inner loop recompute divider unless specified
@@ -3498,9 +3498,9 @@ void set_conditional_defaults(t_options& args) {
     }
 
     //Which schedule?
-    if (args.PlaceInitT.provenance() == Provenance::SPECIFIED // Any of these flags select a manual schedule
-        || args.PlaceExitT.provenance() == Provenance::SPECIFIED
-        || args.PlaceAlphaT.provenance() == Provenance::SPECIFIED) {
+    if (args.place_init_t.provenance() == Provenance::SPECIFIED // Any of these flags select a manual schedule
+        || args.place_exit_t.provenance() == Provenance::SPECIFIED
+        || args.place_alpha_t.provenance() == Provenance::SPECIFIED) {
         args.anneal_sched_type.set(e_sched_type::USER_SCHED, Provenance::INFERRED);
     } else {
         args.anneal_sched_type.set(e_sched_type::AUTO_SCHED, Provenance::INFERRED); // Otherwise use the automatic schedule
diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h
index f846867af77..ece3e391629 100644
--- a/vpr/src/base/read_options.h
+++ b/vpr/src/base/read_options.h
@@ -9,7 +9,7 @@
 #include "argparse.hpp"
 
 struct t_options {
-    /* File names */
+    // File names
     argparse::ArgValue<std::string> ArchFile;
     argparse::ArgValue<std::string> CircuitName;
     argparse::ArgValue<std::string> NetFile;
@@ -49,7 +49,7 @@ struct t_options {
 
     argparse::ArgValue<std::string> write_block_usage;
 
-    /* Stage Options */
+    // Stage Options
     argparse::ArgValue<bool> do_packing;
     argparse::ArgValue<bool> do_legalize;
     argparse::ArgValue<bool> do_placement;
@@ -58,13 +58,13 @@ struct t_options {
     argparse::ArgValue<bool> do_analysis;
     argparse::ArgValue<bool> do_power;
 
-    /* Graphics Options */
+    // Graphics Options
     argparse::ArgValue<bool> show_graphics; ///<Enable argparse::ArgValue<int>eractive graphics?
     argparse::ArgValue<int> GraphPause;
     argparse::ArgValue<bool> save_graphics;
     argparse::ArgValue<std::string> graphics_commands;
 
-    /* General options */
+    // General options
     argparse::ArgValue<bool> show_help;
     argparse::ArgValue<bool> show_version;
     argparse::ArgValue<bool> show_arch_resources;
@@ -86,11 +86,11 @@ struct t_options {
     argparse::ArgValue<bool> allow_dangling_combinational_nodes;
     argparse::ArgValue<bool> terminate_if_timing_fails;
 
-    /* Server options */
+    // Server options
     argparse::ArgValue<bool> is_server_mode_enabled;
     argparse::ArgValue<int> server_port_num;
 
-    /* Atom netlist options */
+    // Atom netlist options
     argparse::ArgValue<bool> absorb_buffer_luts;
     argparse::ArgValue<e_const_gen_inference> const_gen_inference;
     argparse::ArgValue<bool> sweep_dangling_primary_ios;
@@ -99,7 +99,7 @@ struct t_options {
     argparse::ArgValue<bool> sweep_constant_primary_outputs;
     argparse::ArgValue<int> netlist_verbosity;
 
-    /* Analytical Placement options */
+    // Analytical Placement options
     argparse::ArgValue<e_ap_analytical_solver> ap_analytical_solver;
     argparse::ArgValue<e_ap_partial_legalizer> ap_partial_legalizer;
     argparse::ArgValue<e_ap_full_legalizer> ap_full_legalizer;
@@ -111,7 +111,7 @@ struct t_options {
     argparse::ArgValue<int> ap_high_fanout_threshold;
     argparse::ArgValue<bool> ap_generate_mass_report;
 
-    /* Clustering options */
+    // Clustering options
     argparse::ArgValue<bool> connection_driven_clustering;
     argparse::ArgValue<e_unrelated_clustering> allow_unrelated_clustering;
     argparse::ArgValue<float> timing_gain_weight;
@@ -126,19 +126,20 @@ struct t_options {
     argparse::ArgValue<int> pack_feasible_block_array_size;
     argparse::ArgValue<std::vector<std::string>> pack_high_fanout_threshold;
     argparse::ArgValue<int> pack_verbosity;
-    /* Placement options */
-    argparse::ArgValue<int> Seed;
-    argparse::ArgValue<bool> ShowPlaceTiming;
-    argparse::ArgValue<float> PlaceInnerNum;
+
+    // Placement options
+    argparse::ArgValue<int> seed;
+    argparse::ArgValue<bool> show_place_timing;
+    argparse::ArgValue<float> place_inner_num;
     argparse::ArgValue<float> place_auto_init_t_scale;
-    argparse::ArgValue<float> PlaceInitT;
-    argparse::ArgValue<float> PlaceExitT;
-    argparse::ArgValue<float> PlaceAlphaT;
+    argparse::ArgValue<float> place_init_t;
+    argparse::ArgValue<float> place_exit_t;
+    argparse::ArgValue<float> place_alpha_t;
     argparse::ArgValue<e_sched_type> anneal_sched_type;
-    argparse::ArgValue<e_place_algorithm> PlaceAlgorithm;
-    argparse::ArgValue<e_place_algorithm> PlaceQuenchAlgorithm;
+    argparse::ArgValue<e_place_algorithm> place_algorithm;
+    argparse::ArgValue<e_place_algorithm> place_quench_algorithm;
     argparse::ArgValue<e_pad_loc_type> pad_loc_type;
-    argparse::ArgValue<int> PlaceChanWidth;
+    argparse::ArgValue<int> place_chan_width;
     argparse::ArgValue<float> place_rlim_escape_fraction;
     argparse::ArgValue<std::string> place_move_stats_file;
     argparse::ArgValue<int> placement_saves_per_temperature;
@@ -167,7 +168,7 @@ struct t_options {
     argparse::ArgValue<int> placer_debug_block;
     argparse::ArgValue<int> placer_debug_net;
 
-    /*NoC Options*/
+    // NoC Options
     argparse::ArgValue<bool> noc;
     argparse::ArgValue<std::string> noc_flows_file;
     argparse::ArgValue<std::string> noc_routing_algorithm;
@@ -185,9 +186,9 @@ struct t_options {
     argparse::ArgValue<bool> noc_sat_routing_log_search_progress;
     argparse::ArgValue<std::string> noc_placement_file_name;
 
-    /* Timing-driven placement options only */
-    argparse::ArgValue<float> PlaceTimingTradeoff;
-    argparse::ArgValue<int> RecomputeCritIter;
+    // Timing-driven placement options only
+    argparse::ArgValue<float> place_timing_tradeoff;
+    argparse::ArgValue<int> recompute_crit_iter;
     argparse::ArgValue<int> inner_loop_recompute_divider;
     argparse::ArgValue<int> quench_recompute_divider;
     argparse::ArgValue<float> place_exp_first;
@@ -202,7 +203,7 @@ struct t_options {
     argparse::ArgValue<e_reducer> place_delay_model_reducer;
     argparse::ArgValue<std::string> allowed_tiles_for_delay_model;
 
-    /* Router Options */
+    // Router Options
     argparse::ArgValue<bool> check_rr_graph;
     argparse::ArgValue<int> max_router_iterations;
     argparse::ArgValue<float> first_iter_pres_fac;
@@ -232,7 +233,7 @@ struct t_options {
     argparse::ArgValue<int> route_verbosity;
     argparse::ArgValue<int> custom_3d_sb_fanin_fanout;
 
-    /* Timing-driven router options only */
+    // Timing-driven router options only
     argparse::ArgValue<float> astar_fac;
     argparse::ArgValue<float> astar_offset;
     argparse::ArgValue<float> router_profiler_astar_fac;
@@ -267,7 +268,7 @@ struct t_options {
     argparse::ArgValue<e_router_initial_timing> router_initial_timing;
     argparse::ArgValue<e_heap_type> router_heap;
 
-    /* Analysis options */
+    // Analysis options
     argparse::ArgValue<bool> full_stats;
     argparse::ArgValue<bool> Generate_Post_Synthesis_Netlist;
     argparse::ArgValue<bool> Generate_Post_Implementation_Merged_Netlist;
diff --git a/vpr/src/base/setup_vpr.cpp b/vpr/src/base/setup_vpr.cpp
index 4148bdeac4b..8f3b6f0b4c8 100644
--- a/vpr/src/base/setup_vpr.cpp
+++ b/vpr/src/base/setup_vpr.cpp
@@ -542,17 +542,17 @@ static void setup_router_opts(const t_options& Options, t_router_opts* RouterOpt
 
 static void setup_anneal_sched(const t_options& Options,
                                t_annealing_sched* AnnealSched) {
-    AnnealSched->alpha_t = Options.PlaceAlphaT;
+    AnnealSched->alpha_t = Options.place_alpha_t;
     if (AnnealSched->alpha_t >= 1 || AnnealSched->alpha_t <= 0) {
         VPR_FATAL_ERROR(VPR_ERROR_OTHER, "alpha_t must be between 0 and 1 exclusive.\n");
     }
 
-    AnnealSched->exit_t = Options.PlaceExitT;
+    AnnealSched->exit_t = Options.place_exit_t;
     if (AnnealSched->exit_t <= 0) {
         VPR_FATAL_ERROR(VPR_ERROR_OTHER, "exit_t must be greater than 0.\n");
     }
 
-    AnnealSched->init_t = Options.PlaceInitT;
+    AnnealSched->init_t = Options.place_init_t;
     if (AnnealSched->init_t <= 0) {
         VPR_FATAL_ERROR(VPR_ERROR_OTHER, "init_t must be greater than 0.\n");
     }
@@ -561,7 +561,7 @@ static void setup_anneal_sched(const t_options& Options,
         VPR_FATAL_ERROR(VPR_ERROR_OTHER, "init_t must be greater or equal to than exit_t.\n");
     }
 
-    AnnealSched->inner_num = Options.PlaceInnerNum;
+    AnnealSched->inner_num = Options.place_inner_num;
     if (AnnealSched->inner_num <= 0) {
         VPR_FATAL_ERROR(VPR_ERROR_OTHER, "inner_num must be greater than 0.\n");
     }
@@ -657,8 +657,8 @@ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpt
 
     PlacerOpts->td_place_exp_last = Options.place_exp_last;
 
-    PlacerOpts->place_algorithm = Options.PlaceAlgorithm;
-    PlacerOpts->place_quench_algorithm = Options.PlaceQuenchAlgorithm;
+    PlacerOpts->place_algorithm = Options.place_algorithm;
+    PlacerOpts->place_quench_algorithm = Options.place_quench_algorithm;
 
     PlacerOpts->constraints_file = Options.constraints_file;
 
@@ -668,11 +668,11 @@ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpt
 
     PlacerOpts->pad_loc_type = Options.pad_loc_type;
 
-    PlacerOpts->place_chan_width = Options.PlaceChanWidth;
+    PlacerOpts->place_chan_width = Options.place_chan_width;
 
-    PlacerOpts->recompute_crit_iter = Options.RecomputeCritIter;
+    PlacerOpts->recompute_crit_iter = Options.recompute_crit_iter;
 
-    PlacerOpts->timing_tradeoff = Options.PlaceTimingTradeoff;
+    PlacerOpts->timing_tradeoff = Options.place_timing_tradeoff;
 
     /* Depends on PlacerOpts->place_algorithm */
     PlacerOpts->delay_offset = Options.place_delay_offset;
@@ -721,7 +721,7 @@ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpt
     PlacerOpts->floorplan_num_vertical_partitions = Options.floorplan_num_vertical_partitions;
     PlacerOpts->place_quench_only = Options.place_quench_only;
 
-    PlacerOpts->seed = Options.Seed;
+    PlacerOpts->seed = Options.seed;
 
     PlacerOpts->placer_debug_block = Options.placer_debug_block;
     PlacerOpts->placer_debug_net = Options.placer_debug_net;
diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp
index 7ba9580ef9d..d3a8ec901c3 100644
--- a/vpr/src/base/vpr_api.cpp
+++ b/vpr/src/base/vpr_api.cpp
@@ -753,7 +753,7 @@ void vpr_load_packing(const t_vpr_setup& vpr_setup, const t_arch& arch) {
     // constraints with the new information.
     g_vpr_ctx.mutable_floorplanning().update_floorplanning_context_post_pack();
 
-    /* Sanity check the resulting netlist */
+    // Sanity check the resulting netlist
     check_netlist(vpr_setup.PackerOpts.pack_verbosity);
 
     // Independently verify the clustering to ensure the clustering can be
@@ -887,7 +887,6 @@ void vpr_place(const Netlist<>& net_list,
               g_vpr_ctx.atom().flat_placement_info(),
               is_flat);
 
-    g_vpr_ctx.mutable_floorplanning().clean_floorplanning_context_post_place();
 
     auto& filename_opts = vpr_setup.FileNameOpts;
     auto& cluster_ctx = g_vpr_ctx.clustering();
diff --git a/vpr/src/pack/verify_clustering.cpp b/vpr/src/pack/verify_clustering.cpp
index ec08e10a40b..93f925ef68b 100644
--- a/vpr/src/pack/verify_clustering.cpp
+++ b/vpr/src/pack/verify_clustering.cpp
@@ -406,7 +406,7 @@ unsigned verify_clustering(const ClusteredNetlist& clb_nlist,
         // Return here since this error can cause serious issues below.
         return num_errors;
     }
-    // Check conssitency between which clusters the atom's think thet are in and
+    // Check consistency between which clusters the atom's think thet are in and
     // which atoms the clusters think they have.
     num_errors += check_clustering_atom_consistency(clb_nlist,
                                                     atom_nlist,
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 6f2ebb89e0f..467dbed31f9 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -59,16 +59,16 @@ void try_place(const Netlist<>& net_list,
     VTR_LOG("Bounding box mode is %s\n", (mutable_placement.cube_bb ? "Cube" : "Per-layer"));
     VTR_LOG("\n");
 
-    /* To make sure the importance of NoC-related cost terms compared to
-     * BB and timing cost is determine only through NoC placement weighting factor,
-     * we normalize NoC-related cost weighting factors so that they add up to 1.
-     * With this normalization, NoC-related cost weighting factors only determine
-     * the relative importance of NoC cost terms with respect to each other, while
-     * the importance of total NoC cost to conventional placement cost is determined
-     * by NoC placement weighting factor.
-     * FIXME: This should not be modifying the NoC Opts here, this normalization
-     *        should occur when these Opts are loaded in.
-     */
+
+    // To make sure the importance of NoC-related cost terms compared to
+    // BB and timing cost is determine only through NoC placement weighting factor,
+    // we normalize NoC-related cost weighting factors so that they add up to 1.
+    // With this normalization, NoC-related cost weighting factors only determine
+    // the relative importance of NoC cost terms with respect to each other, while
+    // the importance of total NoC cost to conventional placement cost is determined
+    // by NoC placement weighting factor.
+    // FIXME: This should not be modifying the NoC Opts here, this normalization
+    //        should occur when these Opts are loaded in.
     if (noc_opts.noc) {
         normalize_noc_cost_weighting_factor(const_cast<t_noc_opts&>(noc_opts));
     }
@@ -94,10 +94,9 @@ void try_place(const Netlist<>& net_list,
         }
     }
 
-    /* Make the global instance of BlkLocRegistry inaccessible through the getter methods of the
-     * placement context. This is done to make sure that the placement stage only accesses its
-     * own local instances of BlkLocRegistry.
-     */
+    // Make the global instance of BlkLocRegistry inaccessible through the getter methods of the
+    // placement context. This is done to make sure that the placement stage only accesses its
+    // own local instances of BlkLocRegistry.
     mutable_placement.lock_loc_vars();
 
     // Start measuring placement time. The measured execution time will be printed
@@ -150,7 +149,7 @@ static void update_screen_debug();
 
 //Performs a major (i.e. interactive) placement screen update.
 //This function with no arguments is useful for calling from a debugger to
-//look at the intermediate implemetnation state.
+//look at the intermediate implementation state.
 static void update_screen_debug() {
     update_screen(ScreenUpdatePriority::MAJOR, "DEBUG", PLACEMENT, nullptr);
 }

From b8604c6f058aaabab114bea692576f7c1315f59a Mon Sep 17 00:00:00 2001
From: Soheil Shahrouz <soheilqs@gmail.com>
Date: Mon, 21 Jul 2025 13:49:16 -0400
Subject: [PATCH 58/66] call pdate_floorplanning_context_post_pack() at the
 start of placement if cluster_constraints.empty()

---
 vpr/src/base/place_and_route.cpp | 2 --
 vpr/src/place/place.cpp          | 5 +++++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/vpr/src/base/place_and_route.cpp b/vpr/src/base/place_and_route.cpp
index b7145f3ef09..138d411539d 100644
--- a/vpr/src/base/place_and_route.cpp
+++ b/vpr/src/base/place_and_route.cpp
@@ -360,8 +360,6 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list,
         }
     }
 
-    g_vpr_ctx.mutable_floorplanning().clean_floorplanning_context_post_place();
-
     // End binary search verification.
     // Restore the best placement (if necessary), the best routing, and the
     // best channel widths for final drawing and statistics output.
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 467dbed31f9..6133c89fff1 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -51,6 +51,10 @@ void try_place(const Netlist<>& net_list,
     // Initialize the variables in the placement context.
     mutable_placement.init_placement_context(placer_opts, directs);
 
+    if (mutable_floorplanning.cluster_constraints.empty()) {
+        mutable_floorplanning.update_floorplanning_context_post_pack();
+    }
+
     // Update the floorplanning constraints with the macro information from the
     // placement context.
     mutable_floorplanning.update_floorplanning_context_pre_place(*mutable_placement.place_macros);
@@ -123,6 +127,7 @@ void try_place(const Netlist<>& net_list,
     // used by variables which were allocated in the placement context and are
     // never used outside of placement.
     mutable_placement.clean_placement_context_post_place();
+    mutable_floorplanning.clean_floorplanning_context_post_place();
 }
 
 #ifdef VERBOSE

From 88b8c1030f29c3e62439f4293c2439cbec47f991 Mon Sep 17 00:00:00 2001
From: Soheil Shahrouz <soheilqs@gmail.com>
Date: Mon, 21 Jul 2025 15:21:46 -0400
Subject: [PATCH 59/66] enum class e_place_freq

---
 vpr/src/base/ShowSetup.cpp       | 15 +++++++--------
 vpr/src/base/place_and_route.cpp |  8 ++++----
 vpr/src/base/setup_vpr.cpp       |  2 +-
 vpr/src/base/vpr_types.h         | 16 +++++++++-------
 4 files changed, 21 insertions(+), 20 deletions(-)

diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp
index 2e8d36e5f2c..0a10a017772 100644
--- a/vpr/src/base/ShowSetup.cpp
+++ b/vpr/src/base/ShowSetup.cpp
@@ -495,20 +495,19 @@ static void ShowRouterOpts(const t_router_opts& RouterOpts) {
 static void ShowPlacerOpts(const t_placer_opts& PlacerOpts) {
     VTR_LOG("PlacerOpts.place_freq: ");
     switch (PlacerOpts.place_freq) {
-        case PLACE_ONCE:
-            VTR_LOG("PLACE_ONCE\n");
+        case e_place_freq::ONCE:
+            VTR_LOG("ONCE\n");
             break;
-        case PLACE_ALWAYS:
-            VTR_LOG("PLACE_ALWAYS\n");
+        case e_place_freq::ALWAYS:
+            VTR_LOG("ALWAYS\n");
             break;
-        case PLACE_NEVER:
-            VTR_LOG("PLACE_NEVER\n");
+        case e_place_freq::NEVER:
+            VTR_LOG("NEVER\n");
             break;
         default:
             VTR_LOG_ERROR("Unknown Place Freq\n");
     }
-    if ((PLACE_ONCE == PlacerOpts.place_freq)
-        || (PLACE_ALWAYS == PlacerOpts.place_freq)) {
+    if (PlacerOpts.place_freq == e_place_freq::ONCE || PlacerOpts.place_freq == e_place_freq::ALWAYS) {
         VTR_LOG("PlacerOpts.place_algorithm: ");
         switch (PlacerOpts.place_algorithm.get()) {
             case e_place_algorithm::BOUNDING_BOX_PLACE:
diff --git a/vpr/src/base/place_and_route.cpp b/vpr/src/base/place_and_route.cpp
index 138d411539d..6381eb39dc3 100644
--- a/vpr/src/base/place_and_route.cpp
+++ b/vpr/src/base/place_and_route.cpp
@@ -167,7 +167,7 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list,
             break;
         }
 
-        if (placer_opts.place_freq == PLACE_ALWAYS) {
+        if (placer_opts.place_freq == e_place_freq::ALWAYS) {
             placer_opts.place_chan_width = current;
             try_place(placement_net_list,
                       placer_opts,
@@ -312,7 +312,7 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list,
             fflush(stdout);
             if (current < 1)
                 break;
-            if (placer_opts.place_freq == PLACE_ALWAYS) {
+            if (placer_opts.place_freq == e_place_freq::ALWAYS) {
                 placer_opts.place_chan_width = current;
                 try_place(placement_net_list, placer_opts, router_opts, analysis_opts, noc_opts,
                           arch->Chans, det_routing_arch, segment_inf,
@@ -341,7 +341,7 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list,
                              route_ctx.clb_opins_used_locally,
                              saved_clb_opins_used_locally);
 
-                if (placer_opts.place_freq == PLACE_ALWAYS) {
+                if (placer_opts.place_freq == e_place_freq::ALWAYS) {
                     auto& cluster_ctx = g_vpr_ctx.clustering();
                     // Cluster-based net_list is used for placement
                     std::string placement_id = print_place(filename_opts.NetFile.c_str(), cluster_ctx.clb_nlist.netlist_id().c_str(),
@@ -417,7 +417,7 @@ t_chan_width setup_chan_width(const t_router_opts& router_opts,
     if (router_opts.fixed_channel_width == NO_FIXED_CHANNEL_WIDTH) {
         auto& device_ctx = g_vpr_ctx.device();
 
-        auto type = find_most_common_tile_type(device_ctx.grid);
+        t_physical_tile_type_ptr type = find_most_common_tile_type(device_ctx.grid);
 
         width_fac = 4 * type->num_pins;
         // this is 2x the value that binary search starts
diff --git a/vpr/src/base/setup_vpr.cpp b/vpr/src/base/setup_vpr.cpp
index 8f3b6f0b4c8..09728e969bb 100644
--- a/vpr/src/base/setup_vpr.cpp
+++ b/vpr/src/base/setup_vpr.cpp
@@ -683,7 +683,7 @@ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpt
     PlacerOpts->delay_model_type = Options.place_delay_model;
     PlacerOpts->delay_model_reducer = Options.place_delay_model_reducer;
 
-    PlacerOpts->place_freq = PLACE_ALWAYS; /* DEFAULT */
+    PlacerOpts->place_freq = e_place_freq::ALWAYS; /* DEFAULT */
 
     PlacerOpts->post_place_timing_report_file = Options.post_place_timing_report_file;
 
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 41da9a6d085..726b8c3e02d 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -375,23 +375,25 @@ constexpr int NUM_PL_MOVE_TYPES = 7;
 constexpr int NUM_PL_NONTIMING_MOVE_TYPES = 3;
 
 /* Timing data structures end */
+
+// Annealing schedule
 enum class e_sched_type {
     AUTO_SCHED,
     USER_SCHED
 };
-/* Annealing schedule */
 
+
+// What's on screen?
 enum pic_type {
     NO_PICTURE,
     PLACEMENT,
     ROUTING
 };
-/* What's on screen? */
 
-enum pfreq {
-    PLACE_NEVER,
-    PLACE_ONCE,
-    PLACE_ALWAYS
+enum class e_place_freq {
+    NEVER,
+    ONCE,
+    ALWAYS
 };
 
 ///@brief  Power data for t_netlist structure
@@ -1032,7 +1034,7 @@ struct t_placer_opts {
     std::string constraints_file;
     std::string write_initial_place_file;
     std::string read_initial_place_file;
-    enum pfreq place_freq;
+    e_place_freq place_freq;
     int recompute_crit_iter;
     int inner_loop_recompute_divider;
     int quench_recompute_divider;

From eff6743a986bbe1462720d7627e7ce7e770658b0 Mon Sep 17 00:00:00 2001
From: Soheil Shahrouz <soheilqs@gmail.com>
Date: Mon, 21 Jul 2025 15:35:32 -0400
Subject: [PATCH 60/66] remove e_place_freq::NEVER

---
 vpr/src/base/ShowSetup.cpp | 3 ---
 vpr/src/base/vpr_types.h   | 1 -
 2 files changed, 4 deletions(-)

diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp
index 0a10a017772..858bd3198ce 100644
--- a/vpr/src/base/ShowSetup.cpp
+++ b/vpr/src/base/ShowSetup.cpp
@@ -501,9 +501,6 @@ static void ShowPlacerOpts(const t_placer_opts& PlacerOpts) {
         case e_place_freq::ALWAYS:
             VTR_LOG("ALWAYS\n");
             break;
-        case e_place_freq::NEVER:
-            VTR_LOG("NEVER\n");
-            break;
         default:
             VTR_LOG_ERROR("Unknown Place Freq\n");
     }
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index 726b8c3e02d..e9c49f4f281 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -391,7 +391,6 @@ enum pic_type {
 };
 
 enum class e_place_freq {
-    NEVER,
     ONCE,
     ALWAYS
 };

From 4192107a41b750c72af372403f195427a6ee4ecb Mon Sep 17 00:00:00 2001
From: Soheil Shahrouz <soheilqs@gmail.com>
Date: Mon, 21 Jul 2025 15:36:24 -0400
Subject: [PATCH 61/66] add --place_frequency to read_options

---
 vpr/src/base/read_options.cpp | 37 +++++++++++++++++++++++++++++++++++
 vpr/src/base/read_options.h   |  1 +
 vpr/src/base/setup_vpr.cpp    |  2 +-
 3 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 401f6f09fc4..eb1daa4063d 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -618,6 +618,37 @@ struct ParsePlaceBoundingBox {
     }
 };
 
+struct ParsePlacementFreq {
+    ConvertedValue<e_place_freq> from_str(const std::string& str) {
+        ConvertedValue<e_place_freq> conv_value;
+        if (str == "once") {
+            conv_value.set_value(e_place_freq::ONCE);
+        } else if (str == "always") {
+            conv_value.set_value(e_place_freq::ALWAYS);
+        } else {
+            std::stringstream msg;
+            msg << "Invalid conversion from '" << str << "' to e_place_freq (expected one of: " << argparse::join(default_choices(), ", ") << ")";
+            conv_value.set_error(msg.str());
+        }
+        return conv_value;
+    }
+
+    ConvertedValue<std::string> to_str(e_place_freq val) {
+        ConvertedValue<std::string> conv_value;
+        if (val == e_place_freq::ONCE) {
+            conv_value.set_value("once");
+        } else {
+            VTR_ASSERT(val == e_place_freq::ALWAYS);
+            conv_value.set_value("always");
+        }
+        return conv_value;
+    }
+
+    std::vector<std::string> default_choices() {
+        return {"once", "always"};
+    }
+};
+
 struct ParsePlaceAgentAlgorithm {
     ConvertedValue<e_agent_algorithm> from_str(const std::string& str) {
         ConvertedValue<e_agent_algorithm> conv_value;
@@ -2343,6 +2374,12 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
         .choices({"auto_bb", "cube_bb", "per_layer_bb"})
         .show_in(argparse::ShowIn::HELP_ONLY);
 
+    place_grp.add_argument<e_place_freq, ParsePlacementFreq>(args.place_placement_freq, "--place_frequency")
+        .help("Run placement every time or only once during channel width search.")
+        .default_value("once")
+        .choices({"once, always"})
+        .show_in(argparse::ShowIn::HELP_ONLY);
+
     place_grp.add_argument<bool, ParseOnOff>(args.RL_agent_placement, "--RL_agent_placement")
         .help(
             "Uses a Reinforcement Learning (RL) agent in choosing the appropriate move type in placement."
diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h
index ece3e391629..d364354ecfc 100644
--- a/vpr/src/base/read_options.h
+++ b/vpr/src/base/read_options.h
@@ -148,6 +148,7 @@ struct t_options {
     argparse::ArgValue<std::vector<float>> place_static_move_prob;
     argparse::ArgValue<int> place_high_fanout_net;
     argparse::ArgValue<e_place_bounding_box_mode> place_bounding_box_mode;
+    argparse::ArgValue<e_place_freq> place_placement_freq;
 
     argparse::ArgValue<bool> RL_agent_placement;
     argparse::ArgValue<bool> place_agent_multistate;
diff --git a/vpr/src/base/setup_vpr.cpp b/vpr/src/base/setup_vpr.cpp
index 09728e969bb..f010181d82e 100644
--- a/vpr/src/base/setup_vpr.cpp
+++ b/vpr/src/base/setup_vpr.cpp
@@ -683,7 +683,7 @@ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpt
     PlacerOpts->delay_model_type = Options.place_delay_model;
     PlacerOpts->delay_model_reducer = Options.place_delay_model_reducer;
 
-    PlacerOpts->place_freq = e_place_freq::ALWAYS; /* DEFAULT */
+    PlacerOpts->place_freq = Options.place_placement_freq;
 
     PlacerOpts->post_place_timing_report_file = Options.post_place_timing_report_file;
 

From 7c3210094495b80ef2b02c71d77aa68b941a2ebb Mon Sep 17 00:00:00 2001
From: Soheil Shahrouz <soheilqs@gmail.com>
Date: Mon, 21 Jul 2025 16:00:11 -0400
Subject: [PATCH 62/66] update command_line_usage.rst to add --place_frequency

---
 doc/src/vpr/command_line_usage.rst | 13 ++++++++++++-
 vpr/src/base/read_options.cpp      |  6 +++---
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/doc/src/vpr/command_line_usage.rst b/doc/src/vpr/command_line_usage.rst
index b3d482e048a..fe1b46f83b6 100644
--- a/doc/src/vpr/command_line_usage.rst
+++ b/doc/src/vpr/command_line_usage.rst
@@ -907,6 +907,16 @@ If any of init_t, exit_t or alpha_t is specified, the user schedule, with a fixe
 
     **Default:** ``auto_bb``
 
+.. option:: --place_frequency {once | always}
+
+    Specifies how often placement is performed during the minimum channel width search.
+
+    ``once``: Placement is run only once at the beginning of the channel width search. This reduces runtime but may not benefit from congestion-aware optimizations.
+
+    ``always``: Placement is rerun for each channel width trial. This might improve routability at the cost of increased runtime.
+
+    **Default:** ``once``
+
 .. option:: --place_chan_width <int>
 
     Tells VPR how many tracks a channel of relative width 1 is expected to need to complete routing of this circuit.
@@ -1869,6 +1879,7 @@ The following options are only valid when the router is in timing-driven mode (t
     **Default:** ``0.5``
 
 .. option:: --router_initial_acc_cost_chan_congestion_weight <float>
+
     Weight applied to the excess channel utilization (above threshold) when computing the initial accumulated cost (acc_cost)of routing resources.
 
     Higher values make the router more sensitive to early congestion.
@@ -1907,7 +1918,7 @@ The following options are only valid when the router is in timing-driven mode (t
 
 .. option:: --router_first_iter_timing_report <file>
 
-    Name of the timing report file to generate after the first routing iteration completes (not generated if unspecfied).
+    Name of the timing report file to generate after the first routing iteration completes (not generated if unspecified).
 
 .. option:: --router_debug_net <int>
 
diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index eb1daa4063d..6d013d8b9ed 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -2365,9 +2365,9 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
             "Specifies the type of bounding box to be used in 3D architectures.\n"
             "\n"
             "MODE options:\n"
-            "  auto_bb     : Automatically determine the appropriate bounding box based on the connections between layers.\n"
-            "  cube_bb            : Use 3D bounding boxes.\n"
-            "  per_layer_bb     : Use per-layer bounding boxes.\n"
+            "  auto_bb      : Automatically determine the appropriate bounding box based on the connections between layers.\n"
+            "  cube_bb      : Use 3D bounding boxes.\n"
+            "  per_layer_bb : Use per-layer bounding boxes.\n"
             "\n"
             "Choose one of the available modes to define the behavior of bounding boxes in your 3D architecture. The default mode is 'automatic'.")
         .default_value("auto_bb")

From 8833c79f38e33e1512fa3498faac86992147a503 Mon Sep 17 00:00:00 2001
From: Soheil Shahrouz <soheilqs@gmail.com>
Date: Mon, 21 Jul 2025 13:48:45 -0400
Subject: [PATCH 63/66] cherry pick commits to run placement for each channel
 width trial in MRCW search

---
 vpr/src/base/read_options.cpp      | 59 +++++++++++++++---------------
 vpr/src/base/read_options.h        | 54 ++++++++++++++-------------
 vpr/src/base/setup_vpr.cpp         | 22 +++++------
 vpr/src/base/vpr_api.cpp           |  4 +-
 vpr/src/pack/verify_clustering.cpp |  2 +-
 vpr/src/place/place.cpp            | 29 +++++++--------
 6 files changed, 85 insertions(+), 85 deletions(-)

diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index cae2d51651f..912455aabfc 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -2178,7 +2178,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
 
     auto& place_grp = parser.add_argument_group("placement options");
 
-    place_grp.add_argument(args.Seed, "--seed")
+    place_grp.add_argument(args.seed, "--seed")
         .help("Placement random number generator seed")
         .default_value("1")
         .show_in(argparse::ShowIn::HELP_ONLY);
@@ -2196,7 +2196,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
         .default_value("astar")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
-    place_grp.add_argument(args.PlaceInnerNum, "--inner_num")
+    place_grp.add_argument(args.place_inner_num, "--inner_num")
         .help("Controls number of moves per temperature: inner_num * num_blocks ^ (4/3)")
         .default_value("0.5")
         .show_in(argparse::ShowIn::HELP_ONLY);
@@ -2227,17 +2227,17 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
         .default_value("1.0")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
-    place_grp.add_argument(args.PlaceInitT, "--init_t")
+    place_grp.add_argument(args.place_init_t, "--init_t")
         .help("Initial temperature for manual annealing schedule")
         .default_value("100.0")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
-    place_grp.add_argument(args.PlaceExitT, "--exit_t")
+    place_grp.add_argument(args.place_exit_t, "--exit_t")
         .help("Temperature at which annealing which terminate for manual annealing schedule")
         .default_value("0.01")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
-    place_grp.add_argument(args.PlaceAlphaT, "--alpha_t")
+    place_grp.add_argument(args.place_alpha_t, "--alpha_t")
         .help(
             "Temperature scaling factor for manual annealing schedule."
             " Old temperature is multiplied by alpha_t")
@@ -2260,7 +2260,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
         .default_value("")
         .show_in(argparse::ShowIn::HELP_ONLY);
 
-    place_grp.add_argument<e_place_algorithm, ParsePlaceAlgorithm>(args.PlaceAlgorithm, "--place_algorithm")
+    place_grp.add_argument<e_place_algorithm, ParsePlaceAlgorithm>(args.place_algorithm, "--place_algorithm")
         .help(
             "Controls which placement algorithm is used. Valid options:\n"
             " * bounding_box: Focuses purely on minimizing the bounding box wirelength of the circuit. Turns off timing analysis if specified.\n"
@@ -2270,7 +2270,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
         .choices({"bounding_box", "criticality_timing", "slack_timing"})
         .show_in(argparse::ShowIn::HELP_ONLY);
 
-    place_grp.add_argument<e_place_algorithm, ParsePlaceAlgorithm>(args.PlaceQuenchAlgorithm, "--place_quench_algorithm")
+    place_grp.add_argument<e_place_algorithm, ParsePlaceAlgorithm>(args.place_quench_algorithm, "--place_quench_algorithm")
         .help(
             "Controls which placement algorithm is used during placement quench.\n"
             "If specified, it overrides the option --place_algorithm during placement quench.\n"
@@ -2282,7 +2282,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
         .choices({"bounding_box", "criticality_timing", "slack_timing"})
         .show_in(argparse::ShowIn::HELP_ONLY);
 
-    place_grp.add_argument(args.PlaceChanWidth, "--place_chan_width")
+    place_grp.add_argument(args.place_chan_width, "--place_chan_width")
         .help(
             "Sets the assumed channel width during placement. "
             "If --place_chan_width is unspecified, but --route_chan_width is specified the "
@@ -2484,11 +2484,12 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
 
     auto& place_timing_grp = parser.add_argument_group("timing-driven placement options");
 
-    place_timing_grp.add_argument(args.PlaceTimingTradeoff, "--timing_tradeoff")
-        .help("Trade-off control between delay and wirelength during placement. "
-              "0.0 focuses completely on wirelength, 1.0 completely on timing")
-        .default_value("0.5")
-        .show_in(argparse::ShowIn::HELP_ONLY);
+    place_timing_grp.add_argument(args.place_timing_tradeoff, "--timing_tradeoff")
+            .help(
+                "Trade-off control between delay and wirelength during placement."
+                " 0.0 focuses completely on wirelength, 1.0 completely on timing")
+            .default_value("0.5")
+            .show_in(argparse::ShowIn::HELP_ONLY);
 
     place_timing_grp.add_argument(args.place_congestion_factor, "--congestion_factor")
         .help("Weighting factor for congestion cost during placement. "
@@ -2504,11 +2505,9 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
         .show_in(argparse::ShowIn::HELP_ONLY);
 
     place_timing_grp.add_argument(args.place_congestion_chan_util_threshold, "--congestion_chan_util_threshold")
-        .help("Penalizes nets in placement whose average routing channel utilization within their bounding boxes exceeds this threshold.")
-        .default_value("0.5")
-        .show_in(argparse::ShowIn::HELP_ONLY);
+        .help("Penalizes nets in placement whose average routing channel utilization within their bounding boxes exceeds this threshold.");
 
-    place_timing_grp.add_argument(args.RecomputeCritIter, "--recompute_crit_iter")
+    place_timing_grp.add_argument(args.recompute_crit_iter, "--recompute_crit_iter")
         .help("Controls how many temperature updates occur between timing analysis during placement")
         .default_value("1")
         .show_in(argparse::ShowIn::HELP_ONLY);
@@ -3467,11 +3466,11 @@ void set_conditional_defaults(t_options& args) {
      */
 
     //Which placement algorithm to use?
-    if (args.PlaceAlgorithm.provenance() != Provenance::SPECIFIED) {
+    if (args.place_algorithm.provenance() != Provenance::SPECIFIED) {
         if (args.timing_analysis) {
-            args.PlaceAlgorithm.set(e_place_algorithm::CRITICALITY_TIMING_PLACE, Provenance::INFERRED);
+            args.place_algorithm.set(e_place_algorithm::CRITICALITY_TIMING_PLACE, Provenance::INFERRED);
         } else {
-            args.PlaceAlgorithm.set(e_place_algorithm::BOUNDING_BOX_PLACE, Provenance::INFERRED);
+            args.place_algorithm.set(e_place_algorithm::BOUNDING_BOX_PLACE, Provenance::INFERRED);
         }
     }
 
@@ -3485,7 +3484,7 @@ void set_conditional_defaults(t_options& args) {
     // Check for correct options combinations
     // If you are running WLdriven placement, the RL reward function should be
     // either basic or nonPenalizing basic
-    if (args.RL_agent_placement && (args.PlaceAlgorithm == e_place_algorithm::BOUNDING_BOX_PLACE || !args.timing_analysis)) {
+    if (args.RL_agent_placement && (args.place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE || !args.timing_analysis)) {
         if (args.place_reward_fun.value() != "basic" && args.place_reward_fun.value() != "nonPenalizing_basic") {
             VTR_LOG_WARN(
                 "To use RLPlace for WLdriven placements, the reward function should be basic or nonPenalizing_basic.\n"
@@ -3496,18 +3495,18 @@ void set_conditional_defaults(t_options& args) {
     }
 
     //Which placement algorithm to use during placement quench?
-    if (args.PlaceQuenchAlgorithm.provenance() != Provenance::SPECIFIED) {
-        args.PlaceQuenchAlgorithm.set(args.PlaceAlgorithm, Provenance::INFERRED);
+    if (args.place_quench_algorithm.provenance() != Provenance::SPECIFIED) {
+        args.place_quench_algorithm.set(args.place_algorithm, Provenance::INFERRED);
     }
 
     //Place chan width follows Route chan width if unspecified
-    if (args.PlaceChanWidth.provenance() != Provenance::SPECIFIED && args.RouteChanWidth.provenance() == Provenance::SPECIFIED) {
-        args.PlaceChanWidth.set(args.RouteChanWidth.value(), Provenance::INFERRED);
+    if (args.place_chan_width.provenance() != Provenance::SPECIFIED && args.RouteChanWidth.provenance() == Provenance::SPECIFIED) {
+        args.place_chan_width.set(args.RouteChanWidth.value(), Provenance::INFERRED);
     }
 
     //Do we calculate timing info during placement?
-    if (args.ShowPlaceTiming.provenance() != Provenance::SPECIFIED) {
-        args.ShowPlaceTiming.set(args.timing_analysis, Provenance::INFERRED);
+    if (args.show_place_timing.provenance() != Provenance::SPECIFIED) {
+        args.show_place_timing.set(args.timing_analysis, Provenance::INFERRED);
     }
 
     //Slave quench recompute divider of inner loop recompute divider unless specified
@@ -3516,9 +3515,9 @@ void set_conditional_defaults(t_options& args) {
     }
 
     //Which schedule?
-    if (args.PlaceInitT.provenance() == Provenance::SPECIFIED // Any of these flags select a manual schedule
-        || args.PlaceExitT.provenance() == Provenance::SPECIFIED
-        || args.PlaceAlphaT.provenance() == Provenance::SPECIFIED) {
+    if (args.place_init_t.provenance() == Provenance::SPECIFIED // Any of these flags select a manual schedule
+        || args.place_exit_t.provenance() == Provenance::SPECIFIED
+        || args.place_alpha_t.provenance() == Provenance::SPECIFIED) {
         args.anneal_sched_type.set(e_sched_type::USER_SCHED, Provenance::INFERRED);
     } else {
         args.anneal_sched_type.set(e_sched_type::AUTO_SCHED, Provenance::INFERRED); // Otherwise use the automatic schedule
diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h
index 267dd2ab8cf..c0efe2ed503 100644
--- a/vpr/src/base/read_options.h
+++ b/vpr/src/base/read_options.h
@@ -9,7 +9,7 @@
 #include "argparse.hpp"
 
 struct t_options {
-    /* File names */
+    // File names
     argparse::ArgValue<std::string> ArchFile;
     argparse::ArgValue<std::string> CircuitName;
     argparse::ArgValue<std::string> NetFile;
@@ -49,7 +49,7 @@ struct t_options {
 
     argparse::ArgValue<std::string> write_block_usage;
 
-    /* Stage Options */
+    // Stage Options
     argparse::ArgValue<bool> do_packing;
     argparse::ArgValue<bool> do_legalize;
     argparse::ArgValue<bool> do_placement;
@@ -58,13 +58,13 @@ struct t_options {
     argparse::ArgValue<bool> do_analysis;
     argparse::ArgValue<bool> do_power;
 
-    /* Graphics Options */
+    // Graphics Options
     argparse::ArgValue<bool> show_graphics; ///<Enable argparse::ArgValue<int>eractive graphics?
     argparse::ArgValue<int> GraphPause;
     argparse::ArgValue<bool> save_graphics;
     argparse::ArgValue<std::string> graphics_commands;
 
-    /* General options */
+    // General options
     argparse::ArgValue<bool> show_help;
     argparse::ArgValue<bool> show_version;
     argparse::ArgValue<bool> show_arch_resources;
@@ -86,11 +86,11 @@ struct t_options {
     argparse::ArgValue<bool> allow_dangling_combinational_nodes;
     argparse::ArgValue<bool> terminate_if_timing_fails;
 
-    /* Server options */
+    // Server options
     argparse::ArgValue<bool> is_server_mode_enabled;
     argparse::ArgValue<int> server_port_num;
 
-    /* Atom netlist options */
+    // Atom netlist options
     argparse::ArgValue<bool> absorb_buffer_luts;
     argparse::ArgValue<e_const_gen_inference> const_gen_inference;
     argparse::ArgValue<bool> sweep_dangling_primary_ios;
@@ -99,7 +99,7 @@ struct t_options {
     argparse::ArgValue<bool> sweep_constant_primary_outputs;
     argparse::ArgValue<int> netlist_verbosity;
 
-    /* Analytical Placement options */
+    // Analytical Placement options
     argparse::ArgValue<e_ap_analytical_solver> ap_analytical_solver;
     argparse::ArgValue<e_ap_partial_legalizer> ap_partial_legalizer;
     argparse::ArgValue<e_ap_full_legalizer> ap_full_legalizer;
@@ -111,7 +111,7 @@ struct t_options {
     argparse::ArgValue<int> ap_high_fanout_threshold;
     argparse::ArgValue<bool> ap_generate_mass_report;
 
-    /* Clustering options */
+    // Clustering options
     argparse::ArgValue<bool> connection_driven_clustering;
     argparse::ArgValue<e_unrelated_clustering> allow_unrelated_clustering;
     argparse::ArgValue<float> timing_gain_weight;
@@ -126,19 +126,20 @@ struct t_options {
     argparse::ArgValue<int> pack_feasible_block_array_size;
     argparse::ArgValue<std::vector<std::string>> pack_high_fanout_threshold;
     argparse::ArgValue<int> pack_verbosity;
-    /* Placement options */
-    argparse::ArgValue<int> Seed;
-    argparse::ArgValue<bool> ShowPlaceTiming;
-    argparse::ArgValue<float> PlaceInnerNum;
+
+    // Placement options
+    argparse::ArgValue<int> seed;
+    argparse::ArgValue<bool> show_place_timing;
+    argparse::ArgValue<float> place_inner_num;
     argparse::ArgValue<float> place_auto_init_t_scale;
-    argparse::ArgValue<float> PlaceInitT;
-    argparse::ArgValue<float> PlaceExitT;
-    argparse::ArgValue<float> PlaceAlphaT;
+    argparse::ArgValue<float> place_init_t;
+    argparse::ArgValue<float> place_exit_t;
+    argparse::ArgValue<float> place_alpha_t;
     argparse::ArgValue<e_sched_type> anneal_sched_type;
-    argparse::ArgValue<e_place_algorithm> PlaceAlgorithm;
-    argparse::ArgValue<e_place_algorithm> PlaceQuenchAlgorithm;
+    argparse::ArgValue<e_place_algorithm> place_algorithm;
+    argparse::ArgValue<e_place_algorithm> place_quench_algorithm;
     argparse::ArgValue<e_pad_loc_type> pad_loc_type;
-    argparse::ArgValue<int> PlaceChanWidth;
+    argparse::ArgValue<int> place_chan_width;
     argparse::ArgValue<float> place_rlim_escape_fraction;
     argparse::ArgValue<std::string> place_move_stats_file;
     argparse::ArgValue<int> placement_saves_per_temperature;
@@ -167,7 +168,7 @@ struct t_options {
     argparse::ArgValue<int> placer_debug_block;
     argparse::ArgValue<int> placer_debug_net;
 
-    /*NoC Options*/
+    // NoC Options
     argparse::ArgValue<bool> noc;
     argparse::ArgValue<std::string> noc_flows_file;
     argparse::ArgValue<std::string> noc_routing_algorithm;
@@ -185,13 +186,14 @@ struct t_options {
     argparse::ArgValue<bool> noc_sat_routing_log_search_progress;
     argparse::ArgValue<std::string> noc_placement_file_name;
 
-    /* Timing-driven placement options only */
-    argparse::ArgValue<float> PlaceTimingTradeoff;
+
+
+    // Timing-driven placement options only
     argparse::ArgValue<float> place_congestion_factor;
     argparse::ArgValue<float> place_congestion_rlim_trigger_ratio;
     argparse::ArgValue<float> place_congestion_chan_util_threshold;
-
-    argparse::ArgValue<int> RecomputeCritIter;
+    argparse::ArgValue<float> place_timing_tradeoff;
+    argparse::ArgValue<int> recompute_crit_iter;
     argparse::ArgValue<int> inner_loop_recompute_divider;
     argparse::ArgValue<int> quench_recompute_divider;
     argparse::ArgValue<float> place_exp_first;
@@ -206,7 +208,7 @@ struct t_options {
     argparse::ArgValue<e_reducer> place_delay_model_reducer;
     argparse::ArgValue<std::string> allowed_tiles_for_delay_model;
 
-    /* Router Options */
+    // Router Options
     argparse::ArgValue<bool> check_rr_graph;
     argparse::ArgValue<int> max_router_iterations;
     argparse::ArgValue<float> first_iter_pres_fac;
@@ -236,7 +238,7 @@ struct t_options {
     argparse::ArgValue<int> route_verbosity;
     argparse::ArgValue<int> custom_3d_sb_fanin_fanout;
 
-    /* Timing-driven router options only */
+    // Timing-driven router options only
     argparse::ArgValue<float> astar_fac;
     argparse::ArgValue<float> astar_offset;
     argparse::ArgValue<float> router_profiler_astar_fac;
@@ -271,7 +273,7 @@ struct t_options {
     argparse::ArgValue<e_router_initial_timing> router_initial_timing;
     argparse::ArgValue<e_heap_type> router_heap;
 
-    /* Analysis options */
+    // Analysis options
     argparse::ArgValue<bool> full_stats;
     argparse::ArgValue<bool> Generate_Post_Synthesis_Netlist;
     argparse::ArgValue<bool> Generate_Post_Implementation_Merged_Netlist;
diff --git a/vpr/src/base/setup_vpr.cpp b/vpr/src/base/setup_vpr.cpp
index 0f973168cde..da0ce231aa6 100644
--- a/vpr/src/base/setup_vpr.cpp
+++ b/vpr/src/base/setup_vpr.cpp
@@ -560,17 +560,17 @@ static void setup_router_opts(const t_options& Options, t_router_opts* RouterOpt
 
 static void setup_anneal_sched(const t_options& Options,
                                t_annealing_sched* AnnealSched) {
-    AnnealSched->alpha_t = Options.PlaceAlphaT;
+    AnnealSched->alpha_t = Options.place_alpha_t;
     if (AnnealSched->alpha_t >= 1 || AnnealSched->alpha_t <= 0) {
         VPR_FATAL_ERROR(VPR_ERROR_OTHER, "alpha_t must be between 0 and 1 exclusive.\n");
     }
 
-    AnnealSched->exit_t = Options.PlaceExitT;
+    AnnealSched->exit_t = Options.place_exit_t;
     if (AnnealSched->exit_t <= 0) {
         VPR_FATAL_ERROR(VPR_ERROR_OTHER, "exit_t must be greater than 0.\n");
     }
 
-    AnnealSched->init_t = Options.PlaceInitT;
+    AnnealSched->init_t = Options.place_init_t;
     if (AnnealSched->init_t <= 0) {
         VPR_FATAL_ERROR(VPR_ERROR_OTHER, "init_t must be greater than 0.\n");
     }
@@ -579,7 +579,7 @@ static void setup_anneal_sched(const t_options& Options,
         VPR_FATAL_ERROR(VPR_ERROR_OTHER, "init_t must be greater or equal to than exit_t.\n");
     }
 
-    AnnealSched->inner_num = Options.PlaceInnerNum;
+    AnnealSched->inner_num = Options.place_inner_num;
     if (AnnealSched->inner_num <= 0) {
         VPR_FATAL_ERROR(VPR_ERROR_OTHER, "inner_num must be greater than 0.\n");
     }
@@ -655,8 +655,8 @@ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpt
 
     PlacerOpts->td_place_exp_last = Options.place_exp_last;
 
-    PlacerOpts->place_algorithm = Options.PlaceAlgorithm;
-    PlacerOpts->place_quench_algorithm = Options.PlaceQuenchAlgorithm;
+    PlacerOpts->place_algorithm = Options.place_algorithm;
+    PlacerOpts->place_quench_algorithm = Options.place_quench_algorithm;
 
     PlacerOpts->constraints_file = Options.constraints_file;
 
@@ -666,11 +666,11 @@ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpt
 
     PlacerOpts->pad_loc_type = Options.pad_loc_type;
 
-    PlacerOpts->place_chan_width = Options.PlaceChanWidth;
+    PlacerOpts->place_chan_width = Options.place_chan_width;
 
-    PlacerOpts->recompute_crit_iter = Options.RecomputeCritIter;
-
-    PlacerOpts->timing_tradeoff = Options.PlaceTimingTradeoff;
+    PlacerOpts->recompute_crit_iter = Options.recompute_crit_iter;
+    
+    PlacerOpts->timing_tradeoff = Options.place_timing_tradeoff;
     PlacerOpts->congestion_factor = Options.place_congestion_factor;
     PlacerOpts->congestion_rlim_trigger_ratio = Options.place_congestion_rlim_trigger_ratio;
     PlacerOpts->congestion_chan_util_threshold = Options.place_congestion_chan_util_threshold;
@@ -722,7 +722,7 @@ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpt
     PlacerOpts->floorplan_num_vertical_partitions = Options.floorplan_num_vertical_partitions;
     PlacerOpts->place_quench_only = Options.place_quench_only;
 
-    PlacerOpts->seed = Options.Seed;
+    PlacerOpts->seed = Options.seed;
 
     PlacerOpts->placer_debug_block = Options.placer_debug_block;
     PlacerOpts->placer_debug_net = Options.placer_debug_net;
diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp
index dcd0d2394c9..f76a760b207 100644
--- a/vpr/src/base/vpr_api.cpp
+++ b/vpr/src/base/vpr_api.cpp
@@ -753,7 +753,7 @@ void vpr_load_packing(const t_vpr_setup& vpr_setup, const t_arch& arch) {
     // constraints with the new information.
     g_vpr_ctx.mutable_floorplanning().update_floorplanning_context_post_pack();
 
-    /* Sanity check the resulting netlist */
+    // Sanity check the resulting netlist
     check_netlist(vpr_setup.PackerOpts.pack_verbosity);
 
     // Independently verify the clusterings to ensure the clustering can be
@@ -886,7 +886,7 @@ void vpr_place(const Netlist<>& net_list,
               arch.directs,
               g_vpr_ctx.atom().flat_placement_info(),
               is_flat);
-
+    
     auto& filename_opts = vpr_setup.FileNameOpts;
     auto& cluster_ctx = g_vpr_ctx.clustering();
     const auto& block_locs = g_vpr_ctx.placement().block_locs();
diff --git a/vpr/src/pack/verify_clustering.cpp b/vpr/src/pack/verify_clustering.cpp
index ec08e10a40b..93f925ef68b 100644
--- a/vpr/src/pack/verify_clustering.cpp
+++ b/vpr/src/pack/verify_clustering.cpp
@@ -406,7 +406,7 @@ unsigned verify_clustering(const ClusteredNetlist& clb_nlist,
         // Return here since this error can cause serious issues below.
         return num_errors;
     }
-    // Check conssitency between which clusters the atom's think thet are in and
+    // Check consistency between which clusters the atom's think thet are in and
     // which atoms the clusters think they have.
     num_errors += check_clustering_atom_consistency(clb_nlist,
                                                     atom_nlist,
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index c5d46b5af3f..be5a34d2eb3 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -60,16 +60,16 @@ void try_place(const Netlist<>& net_list,
     VTR_LOG("Bounding box mode is %s\n", (mutable_placement.cube_bb ? "Cube" : "Per-layer"));
     VTR_LOG("\n");
 
-    /* To make sure the importance of NoC-related cost terms compared to
-     * BB and timing cost is determine only through NoC placement weighting factor,
-     * we normalize NoC-related cost weighting factors so that they add up to 1.
-     * With this normalization, NoC-related cost weighting factors only determine
-     * the relative importance of NoC cost terms with respect to each other, while
-     * the importance of total NoC cost to conventional placement cost is determined
-     * by NoC placement weighting factor.
-     * FIXME: This should not be modifying the NoC Opts here, this normalization
-     *        should occur when these Opts are loaded in.
-     */
+
+    // To make sure the importance of NoC-related cost terms compared to
+    // BB and timing cost is determine only through NoC placement weighting factor,
+    // we normalize NoC-related cost weighting factors so that they add up to 1.
+    // With this normalization, NoC-related cost weighting factors only determine
+    // the relative importance of NoC cost terms with respect to each other, while
+    // the importance of total NoC cost to conventional placement cost is determined
+    // by NoC placement weighting factor.
+    // FIXME: This should not be modifying the NoC Opts here, this normalization
+    //        should occur when these Opts are loaded in.
     if (noc_opts.noc) {
         normalize_noc_cost_weighting_factor(const_cast<t_noc_opts&>(noc_opts));
     }
@@ -95,10 +95,9 @@ void try_place(const Netlist<>& net_list,
         }
     }
 
-    /* Make the global instance of BlkLocRegistry inaccessible through the getter methods of the
-     * placement context. This is done to make sure that the placement stage only accesses its
-     * own local instances of BlkLocRegistry.
-     */
+    // Make the global instance of BlkLocRegistry inaccessible through the getter methods of the
+    // placement context. This is done to make sure that the placement stage only accesses its
+    // own local instances of BlkLocRegistry.
     mutable_placement.lock_loc_vars();
 
     /* Start measuring placement time. The measured execution time will be printed
@@ -154,7 +153,7 @@ static void update_screen_debug();
 
 //Performs a major (i.e. interactive) placement screen update.
 //This function with no arguments is useful for calling from a debugger to
-//look at the intermediate implemetnation state.
+//look at the intermediate implementation state.
 static void update_screen_debug() {
     update_screen(ScreenUpdatePriority::MAJOR, "DEBUG", PLACEMENT, nullptr);
 }

From 12b597944d6098c9e855ecdc29db246597cfe6a4 Mon Sep 17 00:00:00 2001
From: Soheil Shahrouz <soheilqs@gmail.com>
Date: Mon, 21 Jul 2025 16:35:59 -0400
Subject: [PATCH 64/66] make format

---
 vpr/src/base/read_options.cpp | 10 +++++-----
 vpr/src/base/read_options.h   |  2 --
 vpr/src/base/setup_vpr.cpp    |  2 +-
 vpr/src/base/vpr_api.cpp      |  2 +-
 vpr/src/place/place.cpp       |  1 -
 5 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 912455aabfc..d0d588ef424 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -2485,11 +2485,11 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
     auto& place_timing_grp = parser.add_argument_group("timing-driven placement options");
 
     place_timing_grp.add_argument(args.place_timing_tradeoff, "--timing_tradeoff")
-            .help(
-                "Trade-off control between delay and wirelength during placement."
-                " 0.0 focuses completely on wirelength, 1.0 completely on timing")
-            .default_value("0.5")
-            .show_in(argparse::ShowIn::HELP_ONLY);
+        .help(
+            "Trade-off control between delay and wirelength during placement."
+            " 0.0 focuses completely on wirelength, 1.0 completely on timing")
+        .default_value("0.5")
+        .show_in(argparse::ShowIn::HELP_ONLY);
 
     place_timing_grp.add_argument(args.place_congestion_factor, "--congestion_factor")
         .help("Weighting factor for congestion cost during placement. "
diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h
index c0efe2ed503..780532cb165 100644
--- a/vpr/src/base/read_options.h
+++ b/vpr/src/base/read_options.h
@@ -186,8 +186,6 @@ struct t_options {
     argparse::ArgValue<bool> noc_sat_routing_log_search_progress;
     argparse::ArgValue<std::string> noc_placement_file_name;
 
-
-
     // Timing-driven placement options only
     argparse::ArgValue<float> place_congestion_factor;
     argparse::ArgValue<float> place_congestion_rlim_trigger_ratio;
diff --git a/vpr/src/base/setup_vpr.cpp b/vpr/src/base/setup_vpr.cpp
index da0ce231aa6..ecc4a1706d6 100644
--- a/vpr/src/base/setup_vpr.cpp
+++ b/vpr/src/base/setup_vpr.cpp
@@ -669,7 +669,7 @@ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpt
     PlacerOpts->place_chan_width = Options.place_chan_width;
 
     PlacerOpts->recompute_crit_iter = Options.recompute_crit_iter;
-    
+
     PlacerOpts->timing_tradeoff = Options.place_timing_tradeoff;
     PlacerOpts->congestion_factor = Options.place_congestion_factor;
     PlacerOpts->congestion_rlim_trigger_ratio = Options.place_congestion_rlim_trigger_ratio;
diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp
index f76a760b207..f2eb38e3927 100644
--- a/vpr/src/base/vpr_api.cpp
+++ b/vpr/src/base/vpr_api.cpp
@@ -886,7 +886,7 @@ void vpr_place(const Netlist<>& net_list,
               arch.directs,
               g_vpr_ctx.atom().flat_placement_info(),
               is_flat);
-    
+
     auto& filename_opts = vpr_setup.FileNameOpts;
     auto& cluster_ctx = g_vpr_ctx.clustering();
     const auto& block_locs = g_vpr_ctx.placement().block_locs();
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index be5a34d2eb3..7d7462ed707 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -60,7 +60,6 @@ void try_place(const Netlist<>& net_list,
     VTR_LOG("Bounding box mode is %s\n", (mutable_placement.cube_bb ? "Cube" : "Per-layer"));
     VTR_LOG("\n");
 
-
     // To make sure the importance of NoC-related cost terms compared to
     // BB and timing cost is determine only through NoC placement weighting factor,
     // we normalize NoC-related cost weighting factors so that they add up to 1.

From c816b60225f01bec33d9840b7dcf2a9a286285d7 Mon Sep 17 00:00:00 2001
From: Soheil Shahrouz <soheilqs@gmail.com>
Date: Mon, 21 Jul 2025 16:37:55 -0400
Subject: [PATCH 65/66] make format

---
 vpr/src/base/vpr_api.cpp | 1 -
 vpr/src/base/vpr_types.h | 1 -
 vpr/src/place/place.cpp  | 1 -
 3 files changed, 3 deletions(-)

diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp
index d3a8ec901c3..17c6df68327 100644
--- a/vpr/src/base/vpr_api.cpp
+++ b/vpr/src/base/vpr_api.cpp
@@ -887,7 +887,6 @@ void vpr_place(const Netlist<>& net_list,
               g_vpr_ctx.atom().flat_placement_info(),
               is_flat);
 
-
     auto& filename_opts = vpr_setup.FileNameOpts;
     auto& cluster_ctx = g_vpr_ctx.clustering();
     const auto& block_locs = g_vpr_ctx.placement().block_locs();
diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h
index e9c49f4f281..70449e6fb47 100644
--- a/vpr/src/base/vpr_types.h
+++ b/vpr/src/base/vpr_types.h
@@ -382,7 +382,6 @@ enum class e_sched_type {
     USER_SCHED
 };
 
-
 // What's on screen?
 enum pic_type {
     NO_PICTURE,
diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp
index 6133c89fff1..9e573623385 100644
--- a/vpr/src/place/place.cpp
+++ b/vpr/src/place/place.cpp
@@ -63,7 +63,6 @@ void try_place(const Netlist<>& net_list,
     VTR_LOG("Bounding box mode is %s\n", (mutable_placement.cube_bb ? "Cube" : "Per-layer"));
     VTR_LOG("\n");
 
-
     // To make sure the importance of NoC-related cost terms compared to
     // BB and timing cost is determine only through NoC placement weighting factor,
     // we normalize NoC-related cost weighting factors so that they add up to 1.

From 3bf72daec0cce2a41df07b65eff7745a357102c9 Mon Sep 17 00:00:00 2001
From: Soheil Shahrouz <soheilqs@gmail.com>
Date: Mon, 21 Jul 2025 17:18:17 -0400
Subject: [PATCH 66/66] fix the issue with choices for --place_frequency

---
 vpr/src/base/read_options.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp
index 2d5b5d39ba3..e13c7768b17 100644
--- a/vpr/src/base/read_options.cpp
+++ b/vpr/src/base/read_options.cpp
@@ -2378,7 +2378,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
     place_grp.add_argument<e_place_freq, ParsePlacementFreq>(args.place_placement_freq, "--place_frequency")
         .help("Run placement every time or only once during channel width search.")
         .default_value("once")
-        .choices({"once, always"})
+        .choices({"once", "always"})
         .show_in(argparse::ShowIn::HELP_ONLY);
 
     place_grp.add_argument<bool, ParseOnOff>(args.RL_agent_placement, "--RL_agent_placement")
@@ -2527,7 +2527,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio
             " 0.0 focuses completely on wirelength, 1.0 completely on timing")
         .default_value("0.5")
         .show_in(argparse::ShowIn::HELP_ONLY);
-    
+
     place_timing_grp.add_argument(args.place_congestion_factor, "--congestion_factor")
         .help("Weighting factor for congestion cost during placement. "
               "Higher values prioritize congestion avoidance over bounding box and timing costs. "