From 94fe020b9bf7b72b1855fe1395ed8cf9ced3b514 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 16 Apr 2025 13:49:43 -0400 Subject: [PATCH 01/66] add NetCostHandler::estimate_routing_chann_util() --- vpr/src/place/net_cost_handler.cpp | 42 ++++++++++++++++++++++++++++++ vpr/src/place/net_cost_handler.h | 2 ++ 2 files changed, 44 insertions(+) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 65ec74dbb47..8181787a816 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -1639,6 +1639,48 @@ double NetCostHandler::get_total_wirelength_estimate() const { return estimated_wirelength; } +void NetCostHandler::estimate_routing_chann_util() const { + const auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& place_move_ctx = placer_state_.move(); + const auto& device_ctx = g_vpr_ctx.device(); + + auto chanx_occ = vtr::Matrix({{ + device_ctx.grid.width(), //[0 .. device_ctx.grid.width() - 1] (length of x channel) + device_ctx.grid.height() - 1 //[0 .. device_ctx.grid.height() - 2] (# x channels) + }}, + 0); + + auto chany_occ = vtr::Matrix({{ + device_ctx.grid.width() - 1, //[0 .. device_ctx.grid.width() - 2] (# y channels) + device_ctx.grid.height() //[0 .. device_ctx.grid.height() - 1] (length of y channel) + }}, + 0); + + for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { + if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { + const t_bb& bb = place_move_ctx.bb_coords[net_id]; + double expected_wirelength = get_net_wirelength_estimate(net_id, bb); + int n_y_channels = bb.xmax - bb.xmin + 1; + int n_x_channels = bb.ymax - bb.ymin + 1; + + + double expected_x_wl = (double)n_x_channels / (n_x_channels + n_y_channels) * expected_wirelength; + double expected_y_wl = expected_wirelength - expected_x_wl; + + int total_channel_segments = n_y_channels * n_x_channels; + double expected_per_x_segment_wl = expected_x_wl / total_channel_segments; + double expected_per_y_segment_wl = expected_y_wl / total_channel_segments; + + for (int x = bb.xmin; x <= bb.xmax; x++) { + for (int y = bb.ymin; y <= bb.ymax; y++) { + chanx_occ[x][y] += expected_per_x_segment_wl; + chany_occ[x][y] += expected_per_y_segment_wl; + } + } + } + } +} + void NetCostHandler::set_ts_bb_coord_(const ClusterNetId net_id) { auto& place_move_ctx = placer_state_.mutable_move(); if (cube_bb_) { diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index 510ffa60653..cdc38f30abd 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -126,6 +126,8 @@ class NetCostHandler { */ double get_total_wirelength_estimate() const; + void estimate_routing_chann_util() const; + private: ///@brief Specifies whether the bounding box is computed using cube method or per-layer method. bool cube_bb_; From ee12d6d0d51f54707c1edb7e8d6fa06a668c6c35 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 16 Apr 2025 14:12:43 -0400 Subject: [PATCH 02/66] fix wl contribution in each direction estimated WL should be divided in proportion to the distance traveled in each direction --- vpr/src/place/net_cost_handler.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 8181787a816..ae717609cf9 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -1660,14 +1660,14 @@ void NetCostHandler::estimate_routing_chann_util() const { if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { const t_bb& bb = place_move_ctx.bb_coords[net_id]; double expected_wirelength = get_net_wirelength_estimate(net_id, bb); - int n_y_channels = bb.xmax - bb.xmin + 1; - int n_x_channels = bb.ymax - bb.ymin + 1; + int distance_x = bb.xmax - bb.xmin + 1; + int distance_y = bb.ymax - bb.ymin + 1; - double expected_x_wl = (double)n_x_channels / (n_x_channels + n_y_channels) * expected_wirelength; + double expected_x_wl = (double)distance_x / (distance_x + distance_y) * expected_wirelength; double expected_y_wl = expected_wirelength - expected_x_wl; - int total_channel_segments = n_y_channels * n_x_channels; + int total_channel_segments = distance_x * distance_y; double expected_per_x_segment_wl = expected_x_wl / total_channel_segments; double expected_per_y_segment_wl = expected_y_wl / total_channel_segments; From 15f76fb59799973e611db3679df0b687f4ac446e Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 16 Apr 2025 19:25:05 -0400 Subject: [PATCH 03/66] write post-placement estimate of routing chann util --- vpr/src/base/stats.cpp | 19 +------------------ vpr/src/base/stats.h | 18 ++++++++++++++++++ vpr/src/place/net_cost_handler.cpp | 28 ++++++++++++++++++++++++++++ vpr/src/place/placer.cpp | 2 ++ 4 files changed, 49 insertions(+), 18 deletions(-) diff --git a/vpr/src/base/stats.cpp b/vpr/src/base/stats.cpp index 774235bf2cc..d76201143bf 100644 --- a/vpr/src/base/stats.cpp +++ b/vpr/src/base/stats.cpp @@ -31,23 +31,6 @@ static void load_channel_occupancies(const Netlist<>& net_list, vtr::Matrix& chanx_occ, vtr::Matrix& chany_occ); -/** - * @brief Writes channel occupancy data to a file. - * - * Each row contains: - * - (x, y) coordinate - * - Occupancy count - * - Occupancy percentage (occupancy / capacity) - * - Channel capacity - * - * @param filename Output file path. - * @param occupancy Matrix of occupancy counts. - * @param capacity_list List of channel capacities (per y for chanx, per x for chany). - */ -static void write_channel_occupancy_table(const std::string_view filename, - const vtr::Matrix& occupancy, - const std::vector& capacity_list); - /** * @brief Figures out maximum, minimum and average number of bends * and net length in the routing. @@ -249,7 +232,7 @@ static void get_channel_occupancy_stats(const Netlist<>& net_list, bool /***/) { VTR_LOG("\n"); } -static void write_channel_occupancy_table(const std::string_view filename, +void write_channel_occupancy_table(const std::string_view filename, const vtr::Matrix& occupancy, const std::vector& capacity_list) { constexpr int w_coord = 6; diff --git a/vpr/src/base/stats.h b/vpr/src/base/stats.h index 5f9e50e0700..4f7a3017c5f 100644 --- a/vpr/src/base/stats.h +++ b/vpr/src/base/stats.h @@ -2,6 +2,7 @@ #include #include #include +#include #include "vpr_types.h" #include "netlist.h" @@ -47,3 +48,20 @@ void print_resource_usage(); * @param target_device_utilization The target device utilization set by the user */ void print_device_utilization(const float target_device_utilization); + +/** + * @brief Writes channel occupancy data to a file. + * + * Each row contains: + * - (x, y) coordinate + * - Occupancy count + * - Occupancy percentage (occupancy / capacity) + * - Channel capacity + * + * @param filename Output file path. + * @param occupancy Matrix of occupancy counts. + * @param capacity_list List of channel capacities (per y for chanx, per x for chany). + */ +void write_channel_occupancy_table(const std::string_view filename, + const vtr::Matrix& occupancy, + const std::vector& capacity_list); \ No newline at end of file diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index ae717609cf9..1cfabab601e 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -35,6 +35,7 @@ #include "vtr_ndmatrix.h" #include "PlacerCriticalities.h" #include "vtr_prefix_sum.h" +#include "stats.h" #include @@ -1679,6 +1680,33 @@ void NetCostHandler::estimate_routing_chann_util() const { } } } + + auto chanx_occ_int = vtr::Matrix({{ + device_ctx.grid.width(), + device_ctx.grid.height() - 1 + }}, + 0); + + auto chany_occ_int = vtr::Matrix({{ + device_ctx.grid.width() - 1, + device_ctx.grid.height() + }}, + 0); + + for (size_t x = 0; x < chanx_occ.dim_size(0); ++x) { + for (size_t y = 0; y < chanx_occ.dim_size(1); ++y) { + chanx_occ_int[x][y] = static_cast(std::round(chanx_occ[x][y])); + } + } + + for (size_t x = 0; x < chany_occ.dim_size(0); ++x) { + for (size_t y = 0; y < chany_occ.dim_size(1); ++y) { + chany_occ_int[x][y] = static_cast(std::round(chany_occ[x][y])); + } + } + + write_channel_occupancy_table("place_chanx_occupancy.txt", chanx_occ_int, device_ctx.chan_width.x_list); + write_channel_occupancy_table("place_chany_occupancy.txt", chany_occ_int, device_ctx.chan_width.y_list); } void NetCostHandler::set_ts_bb_coord_(const ClusterNetId net_id) { diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp index 52f68a442e2..b7c689d074c 100644 --- a/vpr/src/place/placer.cpp +++ b/vpr/src/place/placer.cpp @@ -384,6 +384,8 @@ void Placer::place() { check_place_(); log_printer_.print_post_placement_stats(); + + net_cost_handler_.estimate_routing_chann_util(); } void Placer::copy_locs_to_global_state(PlacementContext& place_ctx) { From 45a5c0c84dff546489d3aa5fdc6ff8a070b8c56c Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Fri, 18 Apr 2025 16:49:32 -0400 Subject: [PATCH 04/66] pass the vector by reference to PrefixSum1D constructor --- libs/libvtrutil/src/vtr_prefix_sum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libs/libvtrutil/src/vtr_prefix_sum.h b/libs/libvtrutil/src/vtr_prefix_sum.h index 31635904f1b..ef716e353cc 100644 --- a/libs/libvtrutil/src/vtr_prefix_sum.h +++ b/libs/libvtrutil/src/vtr_prefix_sum.h @@ -93,7 +93,7 @@ class PrefixSum1D { /** * @brief Construct the 1D prefix sum from a vector. */ - PrefixSum1D(std::vector vals, T zero = T()) + PrefixSum1D(const std::vector& vals, T zero = T()) : PrefixSum1D( vals.size(), [&](size_t x) noexcept { From 6b563c0a203ce1b7f04a465d4de143d171721bc0 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Fri, 18 Apr 2025 17:36:06 -0400 Subject: [PATCH 05/66] add acc_chanx_util_ and acc_chany_util_ to NetCostHandler --- vpr/src/place/net_cost_handler.cpp | 11 +++++++---- vpr/src/place/net_cost_handler.h | 4 ++++ 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index c775e674354..5bb1e3c81dd 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -1527,8 +1527,8 @@ void NetCostHandler::find_affected_nets_and_update_costs(const PlaceDelayModel* t_pl_blocks_to_be_moved& blocks_affected, double& bb_delta_c, double& timing_delta_c) { - VTR_ASSERT_SAFE(bb_delta_c == 0.); - VTR_ASSERT_SAFE(timing_delta_c == 0.); + VTR_ASSERT_DEBUG(bb_delta_c == 0.); + VTR_ASSERT_DEBUG(timing_delta_c == 0.); auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist; ts_nets_to_update_.resize(0); @@ -1641,7 +1641,7 @@ double NetCostHandler::get_total_wirelength_estimate() const { return estimated_wirelength; } -void NetCostHandler::estimate_routing_chann_util() const { +void NetCostHandler::estimate_routing_chann_util() { const auto& cluster_ctx = g_vpr_ctx.clustering(); const auto& place_move_ctx = placer_state_.move(); const auto& device_ctx = g_vpr_ctx.device(); @@ -1661,7 +1661,7 @@ void NetCostHandler::estimate_routing_chann_util() const { for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { const t_bb& bb = place_move_ctx.bb_coords[net_id]; - double expected_wirelength = get_net_wirelength_estimate(net_id, bb); + double expected_wirelength = get_net_wirelength_estimate_(net_id); int distance_x = bb.xmax - bb.xmin + 1; int distance_y = bb.ymax - bb.ymin + 1; @@ -1682,6 +1682,9 @@ void NetCostHandler::estimate_routing_chann_util() const { } } + acc_chanx_util_ = vtr::PrefixSum2D(chanx_occ); + acc_chany_util_ = vtr::PrefixSum2D(chanx_occ); + auto chanx_occ_int = vtr::Matrix({{ device_ctx.grid.width(), device_ctx.grid.height() - 1 diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index 1e557ae56a5..57bfc7ce8cb 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -205,6 +205,10 @@ class NetCostHandler { vtr::PrefixSum1D acc_chanx_width_; // [0..device_ctx.grid.width()-1] vtr::PrefixSum1D acc_chany_width_; // [0..device_ctx.grid.height()-1] + vtr::PrefixSum2D acc_chanx_util_; + vtr::PrefixSum2D acc_chany_util_; + + /** * @brief The matrix below is used to calculate a chanz_place_cost_fac based on the average channel width in * the cross-die-layer direction over a 2D (x,y) region. We don't assume the inter-die connectivity is the same at all (x,y) locations, so we From 9157961a6f53cb413bc7c60d8c24b61d6baa2dbf Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sat, 19 Apr 2025 19:22:23 -0400 Subject: [PATCH 06/66] compute chan utilization ratio instead of occupancy --- vpr/src/place/net_cost_handler.cpp | 42 +++++++++++++++++++----------- vpr/src/place/net_cost_handler.h | 21 ++++++++++----- 2 files changed, 42 insertions(+), 21 deletions(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 85cd3ae80ca..7378623a9b9 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -109,6 +109,9 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, if (cube_bb_) { ts_bb_edge_new_.resize(num_nets, t_bb()); ts_bb_coord_new_.resize(num_nets, t_bb()); + + ts_net_avg_chann_util_new_.resize(num_nets); + bb_coords_.resize(num_nets, t_bb()); bb_num_on_edges_.resize(num_nets, t_bb()); comp_bb_cost_functor_ = std::bind(&NetCostHandler::comp_cube_bb_cost_, this, std::placeholders::_1); @@ -533,6 +536,12 @@ void NetCostHandler::get_non_updatable_cube_bb_(ClusterNetId net_id, bool use_ts num_sink_pin_layer[pin_loc.layer_num]++; } + + // the average channel utilization that is going to be updated by this function + auto& [x_chan_util, y_chan_util] = use_ts ? ts_net_avg_chann_util_new_[net_id] : net_avg_chann_util_[net_id]; + const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1); + x_chan_util = acc_chanx_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; + y_chan_util = acc_chany_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; } void NetCostHandler::get_non_updatable_per_layer_bb_(ClusterNetId net_id, bool use_ts) { @@ -1636,16 +1645,15 @@ double NetCostHandler::get_total_wirelength_estimate() const { void NetCostHandler::estimate_routing_chann_util() { const auto& cluster_ctx = g_vpr_ctx.clustering(); - const auto& place_move_ctx = placer_state_.move(); const auto& device_ctx = g_vpr_ctx.device(); - auto chanx_occ = vtr::Matrix({{ + auto chanx_util = vtr::Matrix({{ device_ctx.grid.width(), //[0 .. device_ctx.grid.width() - 1] (length of x channel) device_ctx.grid.height() - 1 //[0 .. device_ctx.grid.height() - 2] (# x channels) }}, 0); - auto chany_occ = vtr::Matrix({{ + auto chany_util = vtr::Matrix({{ device_ctx.grid.width() - 1, //[0 .. device_ctx.grid.width() - 2] (# y channels) device_ctx.grid.height() //[0 .. device_ctx.grid.height() - 1] (length of y channel) }}, @@ -1653,7 +1661,7 @@ void NetCostHandler::estimate_routing_chann_util() { for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { - const t_bb& bb = place_move_ctx.bb_coords[net_id]; + const t_bb& bb = bb_coords_[net_id]; double expected_wirelength = get_net_wirelength_estimate_(net_id); int distance_x = bb.xmax - bb.xmin + 1; @@ -1668,16 +1676,13 @@ void NetCostHandler::estimate_routing_chann_util() { for (int x = bb.xmin; x <= bb.xmax; x++) { for (int y = bb.ymin; y <= bb.ymax; y++) { - chanx_occ[x][y] += expected_per_x_segment_wl; - chany_occ[x][y] += expected_per_y_segment_wl; + chanx_util[x][y] += expected_per_x_segment_wl; + chany_util[x][y] += expected_per_y_segment_wl; } } } } - acc_chanx_util_ = vtr::PrefixSum2D(chanx_occ); - acc_chany_util_ = vtr::PrefixSum2D(chanx_occ); - auto chanx_occ_int = vtr::Matrix({{ device_ctx.grid.width(), device_ctx.grid.height() - 1 @@ -1690,20 +1695,27 @@ void NetCostHandler::estimate_routing_chann_util() { }}, 0); - for (size_t x = 0; x < chanx_occ.dim_size(0); ++x) { - for (size_t y = 0; y < chanx_occ.dim_size(1); ++y) { - chanx_occ_int[x][y] = static_cast(std::round(chanx_occ[x][y])); + const t_chan_width& chan_width = device_ctx.chan_width; + + for (size_t x = 0; x < chanx_util.dim_size(0); ++x) { + for (size_t y = 0; y < chanx_util.dim_size(1); ++y) { + chanx_occ_int[x][y] = static_cast(std::round(chanx_util[x][y])); + chanx_util[x][y] /= chan_width.x_list[y]; } } - for (size_t x = 0; x < chany_occ.dim_size(0); ++x) { - for (size_t y = 0; y < chany_occ.dim_size(1); ++y) { - chany_occ_int[x][y] = static_cast(std::round(chany_occ[x][y])); + for (size_t x = 0; x < chany_util.dim_size(0); ++x) { + for (size_t y = 0; y < chany_util.dim_size(1); ++y) { + chany_occ_int[x][y] = static_cast(std::round(chany_util[x][y])); + chany_util[x][y] /= chan_width.y_list[x]; } } write_channel_occupancy_table("place_chanx_occupancy.txt", chanx_occ_int, device_ctx.chan_width.x_list); write_channel_occupancy_table("place_chany_occupancy.txt", chany_occ_int, device_ctx.chan_width.y_list); + + acc_chanx_util_ = vtr::PrefixSum2D(chanx_util); + acc_chany_util_ = vtr::PrefixSum2D(chany_util); } void NetCostHandler::set_ts_bb_coord_(const ClusterNetId net_id) { diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index fc6893e0158..524d6911091 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -126,7 +126,7 @@ class NetCostHandler { */ double get_total_wirelength_estimate() const; - void estimate_routing_chann_util() const; + void estimate_routing_chann_util(); private: ///@brief Specifies whether the bounding box is computed using cube method or per-layer method. @@ -171,19 +171,28 @@ class NetCostHandler { /* [0...num_affected_nets] -> net_id of the affected nets */ std::vector ts_nets_to_update_; - // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each of a net's bounding box (to allow efficient updates) + vtr::vector> ts_net_avg_chann_util_new_; + + /// Store the number of blocks on each of a net's bounding box (to allow efficient updates) + /// [0..cluster_ctx.clb_nlist.nets().size()-1] vtr::vector bb_num_on_edges_; - // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the bounding box coordinates of a net's bounding box + /// Store the bounding box coordinates of a net's bounding box + /// [0..cluster_ctx.clb_nlist.nets().size()-1] vtr::vector bb_coords_; - // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each of a net's bounding box (to allow efficient updates) + vtr::vector> net_avg_chann_util_; + + /// Store the number of blocks on each of a net's bounding box (to allow efficient updates) + /// [0..cluster_ctx.clb_nlist.nets().size()-1] vtr::vector> layer_bb_num_on_edges_; - // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the bounding box coordinates of a net's bounding box + /// Store the bounding box coordinates of a net's bounding box + /// [0..cluster_ctx.clb_nlist.nets().size()-1] vtr::vector> layer_bb_coords_; - // [0..cluster_ctx.clb_nlist.nets().size()-1]. Store the number of blocks on each layer () + /// Store the number of blocks on each layer () + /// [0..cluster_ctx.clb_nlist.nets().size()-1] vtr::Matrix num_sink_pin_layer_; /** From 3bee75c10b1e01ba1624f065582c01575bc5def6 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sat, 19 Apr 2025 19:22:58 -0400 Subject: [PATCH 07/66] add NetCostHandler::get_net_cube_cong_cost_() --- vpr/src/place/net_cost_handler.cpp | 19 ++++++++++++++++++- vpr/src/place/net_cost_handler.h | 2 ++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 7378623a9b9..dbe8434c739 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -109,7 +109,6 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, if (cube_bb_) { ts_bb_edge_new_.resize(num_nets, t_bb()); ts_bb_coord_new_.resize(num_nets, t_bb()); - ts_net_avg_chann_util_new_.resize(num_nets); bb_coords_.resize(num_nets, t_bb()); @@ -849,6 +848,12 @@ void NetCostHandler::update_bb_(ClusterNetId net_id, if (bb_update_status_[net_id] == NetUpdateState::NOT_UPDATED_YET) { bb_update_status_[net_id] = NetUpdateState::UPDATED_ONCE; } + + // the average channel utilization that is going to be updated by this function + auto& [x_chan_util, y_chan_util] = ts_net_avg_chann_util_new_[net_id]; + const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1); + x_chan_util = acc_chanx_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; + y_chan_util = acc_chany_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; } void NetCostHandler::update_layer_bb_(ClusterNetId net_id, @@ -1376,6 +1381,17 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) { return ncost; } +double NetCostHandler::get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts) { + auto [x_chan_cong, y_chan_cong] = use_ts ? ts_net_avg_chann_util_new_[net_id] : net_avg_chann_util_[net_id]; + + constexpr float threshold = 0.5f; + + x_chan_cong = (x_chan_cong < threshold) ? 0.0f : x_chan_cong - threshold; + y_chan_cong = (y_chan_cong < threshold) ? 0.0f : y_chan_cong - threshold; + + return x_chan_cong + y_chan_cong; +} + double NetCostHandler::get_net_per_layer_bb_cost_(ClusterNetId net_id, bool use_ts) { // Per-layer bounding box of the net const std::vector& bb = use_ts ? layer_ts_bb_coord_new_[net_id] : layer_bb_coords_[net_id]; @@ -1721,6 +1737,7 @@ void NetCostHandler::estimate_routing_chann_util() { void NetCostHandler::set_ts_bb_coord_(const ClusterNetId net_id) { if (cube_bb_) { bb_coords_[net_id] = ts_bb_coord_new_[net_id]; + net_avg_chann_util_[net_id] = ts_net_avg_chann_util_new_[net_id]; } else { layer_bb_coords_[net_id] = layer_ts_bb_coord_new_[net_id]; } diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index 524d6911091..2a2e4e804d3 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -528,6 +528,8 @@ class NetCostHandler { */ double get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts); + double get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts); + /** * @brief Given the per-layer BB, calculate the wire-length cost of the net on each layer * and return the sum of the costs From d0908b9533f5a3fea4f012eb560313b443065d5e Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sat, 19 Apr 2025 19:28:09 -0400 Subject: [PATCH 08/66] rename ts_net_avg_chann_util_new_ and net_avg_chann_util_ --- vpr/src/place/net_cost_handler.cpp | 10 +++++----- vpr/src/place/net_cost_handler.h | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index dbe8434c739..80cc1acb1d7 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -109,7 +109,7 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, if (cube_bb_) { ts_bb_edge_new_.resize(num_nets, t_bb()); ts_bb_coord_new_.resize(num_nets, t_bb()); - ts_net_avg_chann_util_new_.resize(num_nets); + ts_avg_chann_util_new_.resize(num_nets); bb_coords_.resize(num_nets, t_bb()); bb_num_on_edges_.resize(num_nets, t_bb()); @@ -537,7 +537,7 @@ void NetCostHandler::get_non_updatable_cube_bb_(ClusterNetId net_id, bool use_ts } // the average channel utilization that is going to be updated by this function - auto& [x_chan_util, y_chan_util] = use_ts ? ts_net_avg_chann_util_new_[net_id] : net_avg_chann_util_[net_id]; + auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id]; const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1); x_chan_util = acc_chanx_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; y_chan_util = acc_chany_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; @@ -850,7 +850,7 @@ void NetCostHandler::update_bb_(ClusterNetId net_id, } // the average channel utilization that is going to be updated by this function - auto& [x_chan_util, y_chan_util] = ts_net_avg_chann_util_new_[net_id]; + auto& [x_chan_util, y_chan_util] = ts_avg_chann_util_new_[net_id]; const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1); x_chan_util = acc_chanx_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; y_chan_util = acc_chany_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; @@ -1382,7 +1382,7 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) { } double NetCostHandler::get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts) { - auto [x_chan_cong, y_chan_cong] = use_ts ? ts_net_avg_chann_util_new_[net_id] : net_avg_chann_util_[net_id]; + auto [x_chan_cong, y_chan_cong] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id]; constexpr float threshold = 0.5f; @@ -1737,7 +1737,7 @@ void NetCostHandler::estimate_routing_chann_util() { void NetCostHandler::set_ts_bb_coord_(const ClusterNetId net_id) { if (cube_bb_) { bb_coords_[net_id] = ts_bb_coord_new_[net_id]; - net_avg_chann_util_[net_id] = ts_net_avg_chann_util_new_[net_id]; + avg_chann_util_[net_id] = ts_avg_chann_util_new_[net_id]; } else { layer_bb_coords_[net_id] = layer_ts_bb_coord_new_[net_id]; } diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index 2a2e4e804d3..bf955400684 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -171,7 +171,7 @@ class NetCostHandler { /* [0...num_affected_nets] -> net_id of the affected nets */ std::vector ts_nets_to_update_; - vtr::vector> ts_net_avg_chann_util_new_; + vtr::vector> ts_avg_chann_util_new_; /// Store the number of blocks on each of a net's bounding box (to allow efficient updates) /// [0..cluster_ctx.clb_nlist.nets().size()-1] @@ -181,7 +181,7 @@ class NetCostHandler { /// [0..cluster_ctx.clb_nlist.nets().size()-1] vtr::vector bb_coords_; - vtr::vector> net_avg_chann_util_; + vtr::vector> avg_chann_util_; /// Store the number of blocks on each of a net's bounding box (to allow efficient updates) /// [0..cluster_ctx.clb_nlist.nets().size()-1] From 3a2afd3debd6ce0db8e94762c3cc69a6cff013d6 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sun, 20 Apr 2025 17:18:37 -0400 Subject: [PATCH 09/66] update per net average chan util in get_bb_from_scratch_() --- vpr/src/place/net_cost_handler.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index ffa0977af2c..551370770d6 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -112,6 +112,8 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, ts_avg_chann_util_new_.resize(num_nets); bb_coords_.resize(num_nets, t_bb()); + avg_chann_util_.resize(num_nets); + bb_num_on_edges_.resize(num_nets, t_bb()); comp_bb_cost_functor_ = std::bind(&NetCostHandler::comp_cube_bb_cost_, this, std::placeholders::_1); update_bb_functor_ = std::bind(&NetCostHandler::update_bb_, this, std::placeholders::_1, std::placeholders::_2, @@ -584,8 +586,6 @@ void NetCostHandler::update_bb_(ClusterNetId net_id, t_physical_tile_loc pin_new_loc, bool src_pin) { //TODO: account for multiple physical pin instances per logical pin - const t_bb *curr_bb_edge, *curr_bb_coord; - const auto& device_ctx = g_vpr_ctx.device(); const int num_layers = device_ctx.grid.get_num_layers(); @@ -605,6 +605,7 @@ void NetCostHandler::update_bb_(ClusterNetId net_id, vtr::NdMatrixProxy curr_num_sink_pin_layer = (bb_update_status_[net_id] == NetUpdateState::NOT_UPDATED_YET) ? num_sink_pin_layer_[size_t(net_id)] : num_sink_pin_layer_new; + const t_bb *curr_bb_edge, *curr_bb_coord; if (bb_update_status_[net_id] == NetUpdateState::NOT_UPDATED_YET) { /* The net had NOT been updated before, could use the old values */ curr_bb_edge = &bb_num_on_edges_[net_id]; @@ -1285,6 +1286,12 @@ void NetCostHandler::get_bb_from_scratch_(ClusterNetId net_id, bool use_ts) { num_on_edges.ymax = ymax_edge; num_on_edges.layer_min = layer_min_edge; num_on_edges.layer_max = layer_max_edge; + + // the average channel utilization that is going to be updated by this function + auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id]; + const int total_channels = (coords.xmax - coords.xmin + 1) * (coords.ymax - coords.ymin + 1); + x_chan_util = acc_chanx_util_.get_sum(coords.xmin, coords.ymin, coords.xmax, coords.ymax) / total_channels; + y_chan_util = acc_chany_util_.get_sum(coords.xmin, coords.ymin, coords.xmax, coords.ymax) / total_channels; } void NetCostHandler::get_layer_bb_from_scratch_(ClusterNetId net_id, From 758eb86fc52ccc81cf01d04d4aeea569129092f8 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Sun, 20 Apr 2025 17:42:39 -0400 Subject: [PATCH 10/66] add net_cong_cost_ and proposed_net_cong_cost_ --- vpr/src/place/net_cost_handler.cpp | 19 +++++++++++++------ vpr/src/place/net_cost_handler.h | 9 +++++++-- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 551370770d6..fe569b6f892 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -141,6 +141,8 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, // negative net costs mean the cost is not valid. net_cost_.resize(num_nets, -1.); proposed_net_cost_.resize(num_nets, -1.); + net_cong_cost_.resize(num_nets, -1.); + proposed_net_cong_cost_.resize(num_nets, -1.); /* Used to store costs for moves not yet made and to indicate when a net's * cost has been recomputed. proposed_net_cost[inet] < 0 means net's cost hasn't @@ -433,7 +435,6 @@ void NetCostHandler::update_td_delta_costs_(const PlaceDelayModel* delay_model, } } -///@brief Record effected nets. void NetCostHandler::record_affected_net_(const ClusterNetId net) { /* Record effected nets. */ if (proposed_net_cost_[net] < 0.) { @@ -1536,13 +1537,15 @@ static double wirelength_crossing_count(size_t fanout) { } } -void NetCostHandler::set_bb_delta_cost_(double& bb_delta_c) { +void NetCostHandler::set_bb_delta_cost_(double& bb_delta_c, double& congestion_delta_c) { for (const ClusterNetId ts_net : ts_nets_to_update_) { ClusterNetId net_id = ts_net; proposed_net_cost_[net_id] = get_net_bb_cost_functor_(net_id); + proposed_net_cong_cost_[net_id] = get_net_cube_cong_cost_(net_id, /*use_ts=*/true); bb_delta_c += proposed_net_cost_[net_id] - net_cost_[net_id]; + congestion_delta_c += proposed_net_cong_cost_[net_id] - net_cong_cost_[net_id]; } } @@ -1550,10 +1553,11 @@ void NetCostHandler::find_affected_nets_and_update_costs(const PlaceDelayModel* const PlacerCriticalities* criticalities, t_pl_blocks_to_be_moved& blocks_affected, double& bb_delta_c, - double& timing_delta_c) { + double& timing_delta_c, + double& congestion_delta_c) { VTR_ASSERT_DEBUG(bb_delta_c == 0.); VTR_ASSERT_DEBUG(timing_delta_c == 0.); - auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist; + const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist; ts_nets_to_update_.resize(0); @@ -1581,12 +1585,12 @@ void NetCostHandler::find_affected_nets_and_update_costs(const PlaceDelayModel* /* Now update the bounding box costs (since the net bounding * * boxes are up-to-date). The cost is only updated once per net. */ - set_bb_delta_cost_(bb_delta_c); + set_bb_delta_cost_(bb_delta_c, congestion_delta_c); } void NetCostHandler::update_move_nets() { /* update net cost functions and reset flags. */ - auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); for (const ClusterNetId ts_net : ts_nets_to_update_) { ClusterNetId net_id = ts_net; @@ -1602,9 +1606,11 @@ void NetCostHandler::update_move_nets() { } net_cost_[net_id] = proposed_net_cost_[net_id]; + net_cong_cost_[net_id] = proposed_net_cong_cost_[net_id]; /* negative proposed_net_cost value is acting as a flag to mean not computed yet. */ proposed_net_cost_[net_id] = -1; + proposed_net_cong_cost_[net_id] = -1; bb_update_status_[net_id] = NetUpdateState::NOT_UPDATED_YET; } } @@ -1614,6 +1620,7 @@ void NetCostHandler::reset_move_nets() { for (const ClusterNetId ts_net : ts_nets_to_update_) { ClusterNetId net_id = ts_net; proposed_net_cost_[net_id] = -1; + proposed_net_cong_cost_[net_id] = -1; bb_update_status_[net_id] = NetUpdateState::NOT_UPDATED_YET; } } diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index 2546a5fb1eb..733267bba9c 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -91,7 +91,8 @@ class NetCostHandler { const PlacerCriticalities* criticalities, t_pl_blocks_to_be_moved& blocks_affected, double& bb_delta_c, - double& timing_delta_c); + double& timing_delta_c, + double& congestion_delta_c); /** * @brief Reset the net cost function flags (proposed_net_cost and bb_updated_before) @@ -215,6 +216,10 @@ class NetCostHandler { */ vtr::vector net_cost_; vtr::vector proposed_net_cost_; + + vtr::vector net_cong_cost_; + vtr::vector proposed_net_cong_cost_; + vtr::vector bb_update_status_; /** @@ -278,7 +283,7 @@ class NetCostHandler { * indicated in the blocks_affected data structure. * @param bb_delta_c Cost difference after and before moving the block */ - void set_bb_delta_cost_(double& bb_delta_c); + void set_bb_delta_cost_(double& bb_delta_c, double& congestion_delta_c); /** * @brief Allocates and loads the chanx_place_cost_fac and chany_place_cost_fac arrays with the inverse of From a79878aa72a192da8f8b33348d05de8a8dff2d08 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Mon, 21 Apr 2025 16:56:00 -0400 Subject: [PATCH 11/66] take congestion cost into account --- vpr/src/base/SetupVPR.cpp | 1 + vpr/src/base/read_options.cpp | 10 +++++++--- vpr/src/base/read_options.h | 1 + vpr/src/base/vpr_types.h | 1 + vpr/src/place/annealer.cpp | 13 ++++++++----- vpr/src/place/place_util.h | 2 ++ 6 files changed, 20 insertions(+), 8 deletions(-) diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp index 78b8f797ff5..7951eda6f7f 100644 --- a/vpr/src/base/SetupVPR.cpp +++ b/vpr/src/base/SetupVPR.cpp @@ -643,6 +643,7 @@ static void SetupPlacerOpts(const t_options& Options, t_placer_opts* PlacerOpts) PlacerOpts->recompute_crit_iter = Options.RecomputeCritIter; PlacerOpts->timing_tradeoff = Options.PlaceTimingTradeoff; + PlacerOpts->congestion_factor = Options.place_congestion_factor; /* Depends on PlacerOpts->place_algorithm */ PlacerOpts->delay_offset = Options.place_delay_offset; diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 26f9b5bb132..6b335bae0ca 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -2412,12 +2412,16 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio auto& place_timing_grp = parser.add_argument_group("timing-driven placement options"); place_timing_grp.add_argument(args.PlaceTimingTradeoff, "--timing_tradeoff") - .help( - "Trade-off control between delay and wirelength during placement." - " 0.0 focuses completely on wirelength, 1.0 completely on timing") + .help("Trade-off control between delay and wirelength during placement. " + "0.0 focuses completely on wirelength, 1.0 completely on timing") .default_value("0.5") .show_in(argparse::ShowIn::HELP_ONLY); + place_timing_grp.add_argument(args.place_congestion_factor, "--congestion_factor") + .help("To be written") + .default_value("0.0") + .show_in(argparse::ShowIn::HELP_ONLY); + place_timing_grp.add_argument(args.RecomputeCritIter, "--recompute_crit_iter") .help("Controls how many temperature updates occur between timing analysis during placement") .default_value("1") diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h index dd1be4b2575..889ba3f256a 100644 --- a/vpr/src/base/read_options.h +++ b/vpr/src/base/read_options.h @@ -183,6 +183,7 @@ struct t_options { /* Timing-driven placement options only */ argparse::ArgValue PlaceTimingTradeoff; + argparse::ArgValue place_congestion_factor; argparse::ArgValue RecomputeCritIter; argparse::ArgValue inner_loop_recompute_divider; argparse::ArgValue quench_recompute_divider; diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 3bad2e48d3a..652ee8f8a2e 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -961,6 +961,7 @@ struct t_placer_opts { t_place_algorithm place_quench_algorithm; t_annealing_sched anneal_sched; /// Date: Mon, 21 Apr 2025 18:29:17 -0400 Subject: [PATCH 12/66] recompute congestion cost in NetCostHandler --- vpr/src/place/net_cost_handler.cpp | 26 +++++++++++++++----------- vpr/src/place/net_cost_handler.h | 2 +- 2 files changed, 16 insertions(+), 12 deletions(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index fe569b6f892..156f78d3fe7 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -1511,19 +1511,21 @@ float NetCostHandler::get_chanz_cost_factor_(const t_bb& bb) { return z_cost_factor; } -double NetCostHandler::recompute_bb_cost_() { - double cost = 0; +std::pair NetCostHandler::recompute_bb_cong_cost_() { + const auto& cluster_ctx = g_vpr_ctx.clustering(); - auto& cluster_ctx = g_vpr_ctx.clustering(); + double bb_cost = 0.; + double cong_cost = 0.; - for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { /* for each net ... */ - if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { /* Do only if not ignored. */ - /* Bounding boxes don't have to be recomputed; they're correct. */ - cost += net_cost_[net_id]; + for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { + if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { + // Bounding boxes don't have to be recomputed; they're correct. + bb_cost += net_cost_[net_id]; + cong_cost += net_cong_cost_[net_id]; } } - return cost; + return {bb_cost, cong_cost}; } static double wirelength_crossing_count(size_t fanout) { @@ -1639,9 +1641,11 @@ void NetCostHandler::recompute_costs_from_scratch(const PlaceDelayModel* delay_m } }; - double new_bb_cost = recompute_bb_cost_(); + auto[new_bb_cost, new_cong_cost] = recompute_bb_cong_cost_(); check_and_print_cost(new_bb_cost, costs.bb_cost, "bb_cost"); + check_and_print_cost(new_cong_cost, costs.congestion_cost, "cong_cost"); costs.bb_cost = new_bb_cost; + costs.congestion_cost = new_cong_cost; if (placer_opts_.place_algorithm.is_timing_driven()) { double new_timing_cost = 0.; @@ -1658,8 +1662,8 @@ double NetCostHandler::get_total_wirelength_estimate() const { const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist; double estimated_wirelength = 0.0; - for (ClusterNetId net_id : clb_nlist.nets()) { /* for each net ... */ - if (!clb_nlist.net_is_ignored(net_id)) { /* Do only if not ignored. */ + for (ClusterNetId net_id : clb_nlist.nets()) { + if (!clb_nlist.net_is_ignored(net_id)) { if (cube_bb_) { estimated_wirelength += get_net_wirelength_estimate_(net_id); } else { diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index 733267bba9c..900f75657ea 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -517,7 +517,7 @@ class NetCostHandler { * This functions is called to do that for bb cost. It doesn't calculate the BBs from scratch, it would only add the costs again. * @return Total bb (wirelength) cost for the placement */ - double recompute_bb_cost_(); + std::pair recompute_bb_cong_cost_(); /** * @brief Given the 3D BB, calculate the wire-length cost of the net From a326c981d4810a740f26cada65518ebf87fee34c Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Mon, 21 Apr 2025 18:30:13 -0400 Subject: [PATCH 13/66] update congestion cost norm and consider its contribution when computing total cost --- vpr/src/place/place_util.cpp | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp index 1ac0899fbdf..6056e3d15b4 100644 --- a/vpr/src/place/place_util.cpp +++ b/vpr/src/place/place_util.cpp @@ -11,13 +11,18 @@ #include "noc_place_utils.h" void t_placer_costs::update_norm_factors() { + const auto& clustered_nlist = g_vpr_ctx.clustering().clb_nlist; + + bb_cost_norm = 1 / bb_cost; + if (congestion_cost > 0.) { + congestion_cost_norm = 1 / congestion_cost; + } else { + congestion_cost_norm = 1. / (double)clustered_nlist.nets().size(); + } + if (place_algorithm.is_timing_driven()) { - bb_cost_norm = 1 / bb_cost; //Prevent the norm factor from going to infinity timing_cost_norm = std::min(1 / timing_cost, MAX_INV_TIMING_COST); - } else { - VTR_ASSERT_SAFE(place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE); - bb_cost_norm = 1 / bb_cost; //Updating the normalization factor in bounding box mode since the cost in this mode is determined after normalizing the wirelength cost } if (noc_enabled) { @@ -36,6 +41,8 @@ double t_placer_costs::get_total_cost(const t_placer_opts& placer_opts, const t_ total_cost = (1 - placer_opts.timing_tradeoff) * (bb_cost * bb_cost_norm) + (placer_opts.timing_tradeoff) * (timing_cost * timing_cost_norm); } + total_cost += congestion_cost * congestion_cost_norm; + if (noc_opts.noc) { // in noc mode we include noc aggregate bandwidth, noc latency, and noc congestion total_cost += calculate_noc_cost(noc_cost_terms, noc_cost_norm_factors, noc_opts); From 0a2634e2da436ae7878e92afa2d8b5175554ff09 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Mon, 21 Apr 2025 18:34:26 -0400 Subject: [PATCH 14/66] compute average congestion cost in t_placer_statistics --- vpr/src/place/place_util.cpp | 4 ++++ vpr/src/place/place_util.h | 1 + 2 files changed, 5 insertions(+) diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp index 6056e3d15b4..2caead2fcbd 100644 --- a/vpr/src/place/place_util.cpp +++ b/vpr/src/place/place_util.cpp @@ -83,6 +83,7 @@ void t_placer_statistics::reset() { av_cost = 0.; av_bb_cost = 0.; av_timing_cost = 0.; + av_cong_cost = 0.; sum_of_squares = 0.; success_sum = 0; success_rate = 0.; @@ -95,6 +96,7 @@ void t_placer_statistics::single_swap_update(const t_placer_costs& costs) { av_cost += costs.cost; av_bb_cost += costs.bb_cost; av_timing_cost += costs.timing_cost; + av_cong_cost += costs.congestion_cost; sum_of_squares += (costs.cost) * (costs.cost); } @@ -104,10 +106,12 @@ void t_placer_statistics::calc_iteration_stats(const t_placer_costs& costs, int av_cost = costs.cost; av_bb_cost = costs.bb_cost; av_timing_cost = costs.timing_cost; + av_cong_cost = costs.congestion_cost; } else { av_cost /= success_sum; av_bb_cost /= success_sum; av_timing_cost /= success_sum; + av_cong_cost /= success_sum; } success_rate = success_sum / float(move_lim); std_dev = get_std_dev(success_sum, sum_of_squares, av_cost); diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h index 3ae2f9f81f4..b8ef75f8dc7 100644 --- a/vpr/src/place/place_util.h +++ b/vpr/src/place/place_util.h @@ -179,6 +179,7 @@ class t_placer_statistics { double av_cost; double av_bb_cost; double av_timing_cost; + double av_cong_cost; double sum_of_squares; int success_sum; float success_rate; From f3ec24a76a28842da8b3e5a01a18d40e265e7d29 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Tue, 22 Apr 2025 14:08:17 -0400 Subject: [PATCH 15/66] compute congestion cost from scratch in comp_bb_cong_cost() and assign it to t_placer_costs::congestion_cost --- vpr/src/place/net_cost_handler.cpp | 51 ++++++++++++++++++++---------- vpr/src/place/net_cost_handler.h | 8 ++--- vpr/src/place/place_util.cpp | 4 +++ vpr/src/place/placer.cpp | 17 ++++------ 4 files changed, 49 insertions(+), 31 deletions(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 156f78d3fe7..04104c0f5d6 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -115,7 +115,7 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, avg_chann_util_.resize(num_nets); bb_num_on_edges_.resize(num_nets, t_bb()); - comp_bb_cost_functor_ = std::bind(&NetCostHandler::comp_cube_bb_cost_, this, std::placeholders::_1); + comp_bb_cong_cost_functor_ = std::bind(&NetCostHandler::comp_cube_bb_cong_cost_, this, std::placeholders::_1); update_bb_functor_ = std::bind(&NetCostHandler::update_bb_, this, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4); get_net_bb_cost_functor_ = std::bind(&NetCostHandler::get_net_cube_bb_cost_, this, std::placeholders::_1, /*use_ts=*/true); @@ -125,7 +125,7 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, layer_ts_bb_coord_new_.resize(num_nets, std::vector(num_layers, t_2D_bb())); layer_bb_num_on_edges_.resize(num_nets, std::vector(num_layers, t_2D_bb())); layer_bb_coords_.resize(num_nets, std::vector(num_layers, t_2D_bb())); - comp_bb_cost_functor_ = std::bind(&NetCostHandler::comp_per_layer_bb_cost_, this, std::placeholders::_1); + comp_bb_cong_cost_functor_ = std::bind(&NetCostHandler::comp_per_layer_bb_cost_, this, std::placeholders::_1); update_bb_functor_ = std::bind(&NetCostHandler::update_layer_bb_, this, std::placeholders::_1, std::placeholders::_2, std::placeholders::_3, std::placeholders::_4); get_net_bb_cost_functor_ = std::bind(&NetCostHandler::get_net_per_layer_bb_cost_, this, std::placeholders::_1, /*use_ts=*/true); @@ -252,20 +252,21 @@ void NetCostHandler::alloc_and_load_for_fast_vertical_cost_update_() { }); } -std::pair NetCostHandler::comp_bb_cost(e_cost_methods method) { - return comp_bb_cost_functor_(method); +std::tuple NetCostHandler::comp_bb_cong_cost(e_cost_methods method) { + return comp_bb_cong_cost_functor_(method); } -std::pair NetCostHandler::comp_cube_bb_cost_(e_cost_methods method) { +std::tuple NetCostHandler::comp_cube_bb_cong_cost_(e_cost_methods method) { const auto& cluster_ctx = g_vpr_ctx.clustering(); - double cost = 0; - double expected_wirelength = 0.0; + double bb_cost = 0.; + double expected_wirelength = 0.; + double cong_cost = 0.; - for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { /* for each net ... */ - if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { /* Do only if not ignored. */ - /* Small nets don't use incremental updating on their bounding boxes, * - * so they can use a fast bounding box calculator. */ + for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { + if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { + /* Small nets don't use incremental updating on their bounding boxes, + * so they can use a fast bounding box calculator. */ if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET && method == e_cost_methods::NORMAL) { get_bb_from_scratch_(net_id, /*use_ts=*/false); } else { @@ -273,21 +274,35 @@ std::pair NetCostHandler::comp_cube_bb_cost_(e_cost_methods meth } net_cost_[net_id] = get_net_cube_bb_cost_(net_id, /*use_ts=*/false); - cost += net_cost_[net_id]; + bb_cost += net_cost_[net_id]; if (method == e_cost_methods::CHECK) { expected_wirelength += get_net_wirelength_estimate_(net_id); } } } - return {cost, expected_wirelength}; + // Now that all bounding boxes are computed from scratch, we recompute the channel utilization + estimate_routing_chann_util(); + + // Compute congestion cost using recomputed bounding boxes and channel utilization map + for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { + if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { + net_cong_cost_[net_id] = get_net_cube_cong_cost_(net_id, /*use_ts=*/false); + cong_cost += net_cong_cost_[net_id]; + } + } + + + return {bb_cost, expected_wirelength, cong_cost}; } -std::pair NetCostHandler::comp_per_layer_bb_cost_(e_cost_methods method) { +std::tuple NetCostHandler::comp_per_layer_bb_cost_(e_cost_methods method) { const auto& cluster_ctx = g_vpr_ctx.clustering(); - double cost = 0; - double expected_wirelength = 0.0; + double cost = 0.; + double expected_wirelength = 0.; + // TODO: compute congestion cost + constexpr double cong_cost = 0.; for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { /* for each net ... */ if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { /* Do only if not ignored. */ @@ -310,7 +325,9 @@ std::pair NetCostHandler::comp_per_layer_bb_cost_(e_cost_methods } } - return {cost, expected_wirelength}; + + + return {cost, expected_wirelength, cong_cost}; } void NetCostHandler::update_net_bb_(const ClusterNetId net, diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index e94990461c8..0c76eaf6975 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -62,7 +62,7 @@ class NetCostHandler { * * @note The returned estimated wirelength is valid only when method == CHECK */ - std::pair comp_bb_cost(e_cost_methods method); + std::tuple comp_bb_cong_cost(e_cost_methods method); /** * @brief Find all the nets and pins affected by this swap and update costs. @@ -139,7 +139,7 @@ class NetCostHandler { ///@brief Contains some parameter that determine how the placement cost is computed. const t_placer_opts& placer_opts_; ///@brief Points to the proper method for computing the bounding box cost from scratch. - std::function(e_cost_methods method)> comp_bb_cost_functor_; + std::function(e_cost_methods method)> comp_bb_cong_cost_functor_; ///@brief Points to the proper method for updating the bounding box of a net. std::function update_bb_functor_; ///@brief Points to the proper method for getting the bounding box cost of a net @@ -502,7 +502,7 @@ class NetCostHandler { * * @note The returned estimated wirelength is valid only when method == CHECK */ - std::pair comp_per_layer_bb_cost_(e_cost_methods method); + std::tuple comp_per_layer_bb_cost_(e_cost_methods method); /** * @brief Computes the bounding box from scratch using 3D bounding boxes (cube mode) @@ -512,7 +512,7 @@ class NetCostHandler { * * @note The returned estimated wirelength is valid only when method == CHECK */ - std::pair comp_cube_bb_cost_(e_cost_methods method); + std::tuple comp_cube_bb_cong_cost_(e_cost_methods method); /** * @brief if "net" is not already stored as an affected net, add it in ts_nets_to_update. diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp index 2caead2fcbd..f062fb3ab05 100644 --- a/vpr/src/place/place_util.cpp +++ b/vpr/src/place/place_util.cpp @@ -14,6 +14,7 @@ void t_placer_costs::update_norm_factors() { const auto& clustered_nlist = g_vpr_ctx.clustering().clb_nlist; bb_cost_norm = 1 / bb_cost; + if (congestion_cost > 0.) { congestion_cost_norm = 1 / congestion_cost; } else { @@ -23,6 +24,9 @@ void t_placer_costs::update_norm_factors() { if (place_algorithm.is_timing_driven()) { //Prevent the norm factor from going to infinity timing_cost_norm = std::min(1 / timing_cost, MAX_INV_TIMING_COST); + } else { + // Timing normalization factor is not used + timing_cost_norm = std::numeric_limits::quiet_NaN(); } if (noc_enabled) { diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp index 7ae8f5f384e..3e7684d2c77 100644 --- a/vpr/src/place/placer.cpp +++ b/vpr/src/place/placer.cpp @@ -125,19 +125,18 @@ Placer::Placer(const Netlist<>& net_list, } // Gets initial cost and loads bounding boxes. - costs_.bb_cost = net_cost_handler_.comp_bb_cost(e_cost_methods::NORMAL).first; - costs_.bb_cost_norm = 1 / costs_.bb_cost; + std::tie(costs_.bb_cost, std::ignore, costs_.congestion_cost) = net_cost_handler_.comp_bb_cong_cost(e_cost_methods::NORMAL); if (placer_opts.place_algorithm.is_timing_driven()) { alloc_and_init_timing_objects_(net_list, analysis_opts); } else { VTR_ASSERT(placer_opts.place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE); - // Timing cost and normalization factors are not used - constexpr double INVALID_COST = std::numeric_limits::quiet_NaN(); - costs_.timing_cost = INVALID_COST; - costs_.timing_cost_norm = INVALID_COST; + // Timing cost is not used + costs_.timing_cost = std::numeric_limits::quiet_NaN();; } + costs_.update_norm_factors(); + if (noc_opts.noc) { VTR_ASSERT(noc_cost_handler_.has_value()); @@ -222,8 +221,6 @@ void Placer::alloc_and_init_timing_objects_(const Netlist<>& net_list, write_setup_timing_graph_dot(getEchoFileName(E_ECHO_INITIAL_PLACEMENT_TIMING_GRAPH) + std::string(".dot"), *timing_info_, debug_tnode); } - - costs_.timing_cost_norm = 1 / costs_.timing_cost; } void Placer::check_place_() { @@ -264,9 +261,8 @@ void Placer::check_place_() { int Placer::check_placement_costs_() { int error = 0; - double timing_cost_check; - const auto [bb_cost_check, expected_wirelength] = net_cost_handler_.comp_bb_cost(e_cost_methods::CHECK); + const auto [bb_cost_check, expected_wirelength, _] = net_cost_handler_.comp_bb_cong_cost(e_cost_methods::CHECK); if (fabs(bb_cost_check - costs_.bb_cost) > costs_.bb_cost * PL_INCREMENTAL_COST_TOLERANCE) { VTR_LOG_ERROR( @@ -276,6 +272,7 @@ int Placer::check_placement_costs_() { } if (placer_opts_.place_algorithm.is_timing_driven()) { + double timing_cost_check; comp_td_costs(place_delay_model_.get(), *placer_criticalities_, placer_state_, &timing_cost_check); if (fabs(timing_cost_check - costs_.timing_cost) > costs_.timing_cost * PL_INCREMENTAL_COST_TOLERANCE) { VTR_LOG_ERROR( From 57b3c86974e51bec6bf47d65cd39fcec683e3729 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 23 Apr 2025 14:57:07 -0400 Subject: [PATCH 16/66] initialize acc_chanx_util_ and acc_chany_util_ in constructor --- vpr/src/place/net_cost_handler.cpp | 69 +++++++++++++----------------- vpr/src/place/net_cost_handler.h | 4 ++ vpr/src/place/placer.cpp | 2 - 3 files changed, 34 insertions(+), 41 deletions(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 04104c0f5d6..22336c552ea 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -100,7 +100,9 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, : cube_bb_(cube_bb) , placer_state_(placer_state) , placer_opts_(placer_opts) { - const int num_layers = g_vpr_ctx.device().grid.get_num_layers(); + const auto& device_ctx = g_vpr_ctx.device(); + + const int num_layers = device_ctx.grid.get_num_layers(); const size_t num_nets = g_vpr_ctx.clustering().clb_nlist.nets().size(); is_multi_layer_ = num_layers > 1; @@ -150,6 +152,21 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, bb_update_status_.resize(num_nets, NetUpdateState::NOT_UPDATED_YET); alloc_and_load_chan_w_factors_for_place_cost_(); + + chanx_util_ = vtr::Matrix({{ + device_ctx.grid.width(), //[0 .. device_ctx.grid.width() - 1] (length of x channel) + device_ctx.grid.height() - 1 //[0 .. device_ctx.grid.height() - 2] (# x channels) + }}, + 0); + + chany_util_ = vtr::Matrix({{ + device_ctx.grid.width() - 1, //[0 .. device_ctx.grid.width() - 2] (# y channels) + device_ctx.grid.height() //[0 .. device_ctx.grid.height() - 1] (length of y channel) + }}, + 0); + + acc_chanx_util_ = vtr::PrefixSum2D(chanx_util_); + acc_chany_util_ = vtr::PrefixSum2D(chany_util_); } void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_() { @@ -1696,17 +1713,8 @@ void NetCostHandler::estimate_routing_chann_util() { const auto& cluster_ctx = g_vpr_ctx.clustering(); const auto& device_ctx = g_vpr_ctx.device(); - auto chanx_util = vtr::Matrix({{ - device_ctx.grid.width(), //[0 .. device_ctx.grid.width() - 1] (length of x channel) - device_ctx.grid.height() - 1 //[0 .. device_ctx.grid.height() - 2] (# x channels) - }}, - 0); - - auto chany_util = vtr::Matrix({{ - device_ctx.grid.width() - 1, //[0 .. device_ctx.grid.width() - 2] (# y channels) - device_ctx.grid.height() //[0 .. device_ctx.grid.height() - 1] (length of y channel) - }}, - 0); + chanx_util_.fill(0.); + chany_util_.fill(0.); for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { @@ -1725,46 +1733,29 @@ void NetCostHandler::estimate_routing_chann_util() { for (int x = bb.xmin; x <= bb.xmax; x++) { for (int y = bb.ymin; y <= bb.ymax; y++) { - chanx_util[x][y] += expected_per_x_segment_wl; - chany_util[x][y] += expected_per_y_segment_wl; + chanx_util_[x][y] += expected_per_x_segment_wl; + chany_util_[x][y] += expected_per_y_segment_wl; } } } } - auto chanx_occ_int = vtr::Matrix({{ - device_ctx.grid.width(), - device_ctx.grid.height() - 1 - }}, - 0); - - auto chany_occ_int = vtr::Matrix({{ - device_ctx.grid.width() - 1, - device_ctx.grid.height() - }}, - 0); - const t_chan_width& chan_width = device_ctx.chan_width; - for (size_t x = 0; x < chanx_util.dim_size(0); ++x) { - for (size_t y = 0; y < chanx_util.dim_size(1); ++y) { - chanx_occ_int[x][y] = static_cast(std::round(chanx_util[x][y])); - chanx_util[x][y] /= chan_width.x_list[y]; + for (size_t x = 0; x < chanx_util_.dim_size(0); ++x) { + for (size_t y = 0; y < chanx_util_.dim_size(1); ++y) { + chanx_util_[x][y] /= chan_width.x_list[y]; } } - for (size_t x = 0; x < chany_util.dim_size(0); ++x) { - for (size_t y = 0; y < chany_util.dim_size(1); ++y) { - chany_occ_int[x][y] = static_cast(std::round(chany_util[x][y])); - chany_util[x][y] /= chan_width.y_list[x]; + for (size_t x = 0; x < chany_util_.dim_size(0); ++x) { + for (size_t y = 0; y < chany_util_.dim_size(1); ++y) { + chany_util_[x][y] /= chan_width.y_list[x]; } } - write_channel_occupancy_table("place_chanx_occupancy.txt", chanx_occ_int, device_ctx.chan_width.x_list); - write_channel_occupancy_table("place_chany_occupancy.txt", chany_occ_int, device_ctx.chan_width.y_list); - - acc_chanx_util_ = vtr::PrefixSum2D(chanx_util); - acc_chany_util_ = vtr::PrefixSum2D(chany_util); + acc_chanx_util_ = vtr::PrefixSum2D(chanx_util_); + acc_chany_util_ = vtr::PrefixSum2D(chany_util_); } void NetCostHandler::set_ts_bb_coord_(const ClusterNetId net_id) { diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index 0c76eaf6975..38d5dbb0d72 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -246,6 +246,10 @@ class NetCostHandler { vtr::PrefixSum2D acc_chany_util_; + vtr::Matrix chanx_util_; + vtr::Matrix chany_util_; + + /** * @brief The matrix below is used to calculate a chanz_place_cost_fac based on the average channel width in * the cross-die-layer direction over a 2D (x,y) region. We don't assume the inter-die connectivity is the same at all (x,y) locations, so we diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp index 3e7684d2c77..a96ac02c2a1 100644 --- a/vpr/src/place/placer.cpp +++ b/vpr/src/place/placer.cpp @@ -386,8 +386,6 @@ void Placer::place() { check_place_(); log_printer_.print_post_placement_stats(); - - net_cost_handler_.estimate_routing_chann_util(); } void Placer::copy_locs_to_global_state(PlacementContext& place_ctx) { From 01d917e30e7119e41d7c44ee98ea75543a2c21df Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 23 Apr 2025 15:17:13 -0400 Subject: [PATCH 17/66] add --congestion_acceptance_rate_trigger commmand line option and enable congestion modeling when acceptance rate drops below this value --- vpr/src/base/SetupVPR.cpp | 1 + vpr/src/base/read_options.cpp | 5 +++++ vpr/src/base/read_options.h | 2 ++ vpr/src/base/vpr_types.h | 3 ++- vpr/src/place/annealer.cpp | 8 +++++++- vpr/src/place/annealer.h | 2 ++ vpr/src/place/net_cost_handler.cpp | 1 + 7 files changed, 20 insertions(+), 2 deletions(-) diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp index 17a9025497c..ad4d48687d5 100644 --- a/vpr/src/base/SetupVPR.cpp +++ b/vpr/src/base/SetupVPR.cpp @@ -645,6 +645,7 @@ static void SetupPlacerOpts(const t_options& Options, t_placer_opts* PlacerOpts) PlacerOpts->timing_tradeoff = Options.PlaceTimingTradeoff; PlacerOpts->congestion_factor = Options.place_congestion_factor; + PlacerOpts->congestion_acceptance_rate_trigger = Options.place_congestion_acceptance_rate_trigger; /* Depends on PlacerOpts->place_algorithm */ PlacerOpts->delay_offset = Options.place_delay_offset; diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 7dd74d07517..026d7967bbf 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -2429,6 +2429,11 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .default_value("0.0") .show_in(argparse::ShowIn::HELP_ONLY); + place_timing_grp.add_argument(args.place_congestion_acceptance_rate_trigger, "--congestion_acceptance_rate_trigger") + .help("To be written") + .default_value("0.0") + .show_in(argparse::ShowIn::HELP_ONLY); + place_timing_grp.add_argument(args.RecomputeCritIter, "--recompute_crit_iter") .help("Controls how many temperature updates occur between timing analysis during placement") .default_value("1") diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h index 276d4efc9b0..12e89e382ef 100644 --- a/vpr/src/base/read_options.h +++ b/vpr/src/base/read_options.h @@ -185,6 +185,8 @@ struct t_options { /* Timing-driven placement options only */ argparse::ArgValue PlaceTimingTradeoff; argparse::ArgValue place_congestion_factor; + argparse::ArgValue place_congestion_acceptance_rate_trigger; + argparse::ArgValue RecomputeCritIter; argparse::ArgValue inner_loop_recompute_divider; argparse::ArgValue quench_recompute_divider; diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 0b3762d3ca8..ee94f8dada3 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -962,6 +962,7 @@ struct t_placer_opts { t_annealing_sched anneal_sched; ///(grid_width, [&](size_t x) noexcept { int chan_y_width = device_ctx.chan_width.y_list[x]; From e5a51b4162682a9631672e23725a4542844de430 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 23 Apr 2025 15:51:40 -0400 Subject: [PATCH 18/66] added --congestion_chan_util_threshold command line option --- vpr/src/base/SetupVPR.cpp | 1 + vpr/src/base/read_options.cpp | 5 +++++ vpr/src/base/read_options.h | 1 + vpr/src/base/vpr_types.h | 1 + vpr/src/place/net_cost_handler.cpp | 4 ++-- 5 files changed, 10 insertions(+), 2 deletions(-) diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp index ad4d48687d5..06484a41a6c 100644 --- a/vpr/src/base/SetupVPR.cpp +++ b/vpr/src/base/SetupVPR.cpp @@ -646,6 +646,7 @@ static void SetupPlacerOpts(const t_options& Options, t_placer_opts* PlacerOpts) PlacerOpts->timing_tradeoff = Options.PlaceTimingTradeoff; PlacerOpts->congestion_factor = Options.place_congestion_factor; PlacerOpts->congestion_acceptance_rate_trigger = Options.place_congestion_acceptance_rate_trigger; + PlacerOpts->congestion_chan_util_threshold = Options.place_congestion_chan_util_threshold; /* Depends on PlacerOpts->place_algorithm */ PlacerOpts->delay_offset = Options.place_delay_offset; diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 026d7967bbf..646e419f1af 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -2434,6 +2434,11 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .default_value("0.0") .show_in(argparse::ShowIn::HELP_ONLY); + place_timing_grp.add_argument(args.place_congestion_chan_util_threshold, "--congestion_chan_util_threshold") + .help("To be written") + .default_value("1.0") + .show_in(argparse::ShowIn::HELP_ONLY); + place_timing_grp.add_argument(args.RecomputeCritIter, "--recompute_crit_iter") .help("Controls how many temperature updates occur between timing analysis during placement") .default_value("1") diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h index 12e89e382ef..4a3a8b0c05b 100644 --- a/vpr/src/base/read_options.h +++ b/vpr/src/base/read_options.h @@ -186,6 +186,7 @@ struct t_options { argparse::ArgValue PlaceTimingTradeoff; argparse::ArgValue place_congestion_factor; argparse::ArgValue place_congestion_acceptance_rate_trigger; + argparse::ArgValue place_congestion_chan_util_threshold; argparse::ArgValue RecomputeCritIter; argparse::ArgValue inner_loop_recompute_divider; diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index ee94f8dada3..b1b03bdbe5c 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -963,6 +963,7 @@ struct t_placer_opts { float timing_tradeoff; float congestion_factor; float congestion_acceptance_rate_trigger; + float congestion_chan_util_threshold; int place_chan_width; enum e_pad_loc_type pad_loc_type; std::string constraints_file; diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index a54a38ffb93..04c04a503ca 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -1390,7 +1390,7 @@ void NetCostHandler::get_layer_bb_from_scratch_(ClusterNetId net_id, double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) { // Finds the cost due to one net by looking at its coordinate bounding box. - auto& cluster_ctx = g_vpr_ctx.clustering(); + const auto& cluster_ctx = g_vpr_ctx.clustering(); const t_bb& bb = use_ts ? ts_bb_coord_new_[net_id] : bb_coords_[net_id]; @@ -1425,7 +1425,7 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) { double NetCostHandler::get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts) { auto [x_chan_cong, y_chan_cong] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id]; - constexpr float threshold = 0.5f; + const float threshold = placer_opts_.congestion_chan_util_threshold; x_chan_cong = (x_chan_cong < threshold) ? 0.0f : x_chan_cong - threshold; y_chan_cong = (y_chan_cong < threshold) ? 0.0f : y_chan_cong - threshold; From cd6215a55f2e8378e4949f565a39f17e782c5d1c Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Thu, 24 Apr 2025 11:13:18 -0400 Subject: [PATCH 19/66] make find_subtile_in_location() definition static --- vpr/src/place/initial_placement.cpp | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vpr/src/place/initial_placement.cpp b/vpr/src/place/initial_placement.cpp index aac91e0fd65..21ebdfa2570 100644 --- a/vpr/src/place/initial_placement.cpp +++ b/vpr/src/place/initial_placement.cpp @@ -366,11 +366,11 @@ static bool is_loc_legal(const t_pl_loc& loc, return legal; } -bool find_subtile_in_location(t_pl_loc& centroid, - t_logical_block_type_ptr block_type, - const BlkLocRegistry& blk_loc_registry, - const PartitionRegion& pr, - vtr::RngContainer& rng) { +static bool find_subtile_in_location(t_pl_loc& centroid, + t_logical_block_type_ptr block_type, + const BlkLocRegistry& blk_loc_registry, + const PartitionRegion& pr, + vtr::RngContainer& rng) { //check if the location is on chip and legal, if yes try to update subtile if (is_loc_on_chip({centroid.x, centroid.y, centroid.layer}) && is_loc_legal(centroid, pr, block_type)) { //find the compatible subtiles From 8b395aaea29cb07cb877ebde66c35bca875d50b7 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Thu, 24 Apr 2025 11:18:01 -0400 Subject: [PATCH 20/66] include the last column and row in chan?_util_ --- vpr/src/place/net_cost_handler.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 04c04a503ca..7e8604477dc 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -111,10 +111,10 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, if (cube_bb_) { ts_bb_edge_new_.resize(num_nets, t_bb()); ts_bb_coord_new_.resize(num_nets, t_bb()); - ts_avg_chann_util_new_.resize(num_nets); + ts_avg_chann_util_new_.resize(num_nets, {0., 0.}); bb_coords_.resize(num_nets, t_bb()); - avg_chann_util_.resize(num_nets); + avg_chann_util_.resize(num_nets, {0., 0.}); bb_num_on_edges_.resize(num_nets, t_bb()); comp_bb_cong_cost_functor_ = std::bind(&NetCostHandler::comp_cube_bb_cong_cost_, this, std::placeholders::_1); @@ -155,12 +155,12 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, chanx_util_ = vtr::Matrix({{ device_ctx.grid.width(), //[0 .. device_ctx.grid.width() - 1] (length of x channel) - device_ctx.grid.height() - 1 //[0 .. device_ctx.grid.height() - 2] (# x channels) + device_ctx.grid.height() //[0 .. device_ctx.grid.height() - 1] (# x channels) }}, 0); chany_util_ = vtr::Matrix({{ - device_ctx.grid.width() - 1, //[0 .. device_ctx.grid.width() - 2] (# y channels) + device_ctx.grid.width(), //[0 .. device_ctx.grid.width() - 1] (# y channels) device_ctx.grid.height() //[0 .. device_ctx.grid.height() - 1] (length of y channel) }}, 0); From a41a256e0e371c0e4d065d653bb4826f85f9cd64 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Thu, 24 Apr 2025 12:21:33 -0400 Subject: [PATCH 21/66] recompute congestion cost when estimating channel utilization --- vpr/src/place/annealer.cpp | 6 ++++++ vpr/src/place/net_cost_handler.cpp | 19 +++++++++++++++---- vpr/src/place/net_cost_handler.h | 2 +- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index 90d253790bb..0d0abd63781 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -671,6 +671,12 @@ void PlacementAnnealer::outer_loop_update_timing_info() { outer_crit_iter_count_++; } + if (congestion_modeling_started_ + || (placer_stats_.success_rate < placer_opts_.congestion_acceptance_rate_trigger && placer_stats_.av_cost != 0.)) { + costs_.congestion_cost = net_cost_handler_.estimate_routing_chann_util(); + congestion_modeling_started_ = true; + } + // Update the cost normalization factors costs_.update_norm_factors(); diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 7e8604477dc..751a7e48b56 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -300,7 +300,7 @@ std::tuple NetCostHandler::comp_cube_bb_cong_cost_(e_cos } // Now that all bounding boxes are computed from scratch, we recompute the channel utilization - estimate_routing_chann_util(); +// estimate_routing_chann_util(); // Compute congestion cost using recomputed bounding boxes and channel utilization map for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { @@ -1564,8 +1564,8 @@ std::pair NetCostHandler::recompute_bb_cong_cost_() { } static double wirelength_crossing_count(size_t fanout) { - /* Get the expected "crossing count" of a net, based on its number * - * of pins. Extrapolate for very large nets. */ + /* Get the expected "crossing count" of a net, based on its number + * of pins. Extrapolate for very large nets. */ if (fanout > MAX_FANOUT_CROSSING_COUNT) { return 2.7933 + 0.02616 * (fanout - MAX_FANOUT_CROSSING_COUNT); @@ -1710,7 +1710,7 @@ double NetCostHandler::get_total_wirelength_estimate() const { return estimated_wirelength; } -void NetCostHandler::estimate_routing_chann_util() { +double NetCostHandler::estimate_routing_chann_util() { const auto& cluster_ctx = g_vpr_ctx.clustering(); const auto& device_ctx = g_vpr_ctx.device(); @@ -1757,6 +1757,17 @@ void NetCostHandler::estimate_routing_chann_util() { acc_chanx_util_ = vtr::PrefixSum2D(chanx_util_); acc_chany_util_ = vtr::PrefixSum2D(chany_util_); + + double cong_cost = 0.; + // Compute congestion cost using recomputed bounding boxes and channel utilization map + for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { + if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { + net_cong_cost_[net_id] = get_net_cube_cong_cost_(net_id, /*use_ts=*/false); + cong_cost += net_cong_cost_[net_id]; + } + } + + return cong_cost; } void NetCostHandler::set_ts_bb_coord_(const ClusterNetId net_id) { diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index 38d5dbb0d72..223e3d87d5a 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -127,7 +127,7 @@ class NetCostHandler { */ double get_total_wirelength_estimate() const; - void estimate_routing_chann_util(); + double estimate_routing_chann_util(); private: ///@brief Specifies whether the bounding box is computed using cube method or per-layer method. From 7f6c4962b6ee31dcd70baccf31477d1b443c7476 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Thu, 1 May 2025 12:24:33 -0400 Subject: [PATCH 22/66] weigh congestion cost along x/y axis with the length of bb along that dimension --- vpr/src/place/net_cost_handler.cpp | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 751a7e48b56..cdd220bc61c 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -1423,14 +1423,19 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) { } double NetCostHandler::get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts) { - auto [x_chan_cong, y_chan_cong] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id]; + const auto [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id]; + + const t_bb& bb = use_ts ? ts_bb_coord_new_[net_id] : bb_coords_[net_id]; + + int distance_x = bb.xmax - bb.xmin + 1; + int distance_y = bb.ymax - bb.ymin + 1; const float threshold = placer_opts_.congestion_chan_util_threshold; - x_chan_cong = (x_chan_cong < threshold) ? 0.0f : x_chan_cong - threshold; - y_chan_cong = (y_chan_cong < threshold) ? 0.0f : y_chan_cong - threshold; + float x_chan_cong = (x_chan_util < threshold) ? 0.0f : x_chan_util - threshold; + float y_chan_cong = (y_chan_util < threshold) ? 0.0f : y_chan_util - threshold; - return x_chan_cong + y_chan_cong; + return (distance_x * x_chan_cong) + (distance_y * y_chan_cong); } double NetCostHandler::get_net_per_layer_bb_cost_(ClusterNetId net_id, bool use_ts) { From 5f11b9b7d3d79ac0ee25eb8d970998ac52d79bdc Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Fri, 16 May 2025 15:00:44 -0400 Subject: [PATCH 23/66] cherrypick calculate_channel_width(); --- vpr/src/base/stats.cpp | 35 +++++++++++++++++++++++++++++++++++ vpr/src/base/stats.h | 2 ++ 2 files changed, 37 insertions(+) diff --git a/vpr/src/base/stats.cpp b/vpr/src/base/stats.cpp index 041551f885d..f9eb33e31ce 100644 --- a/vpr/src/base/stats.cpp +++ b/vpr/src/base/stats.cpp @@ -113,6 +113,41 @@ void routing_stats(const Netlist<>& net_list, } } +std::pair, vtr::NdMatrix> calculate_channel_width() { + const auto& device_ctx = g_vpr_ctx.device(); + const auto& rr_graph = device_ctx.rr_graph; + + auto chanx_width = vtr::NdMatrix({{(size_t)device_ctx.grid.get_num_layers(), + device_ctx.grid.width(), + device_ctx.grid.height()}}, + 0); + + auto chany_width = vtr::NdMatrix({{(size_t)device_ctx.grid.get_num_layers(), + device_ctx.grid.width(), + device_ctx.grid.height()}}, + 0); + + for (RRNodeId node_id : rr_graph.nodes()) { + e_rr_type rr_type = rr_graph.node_type(node_id); + + if (rr_type == e_rr_type::CHANX) { + int y = rr_graph.node_ylow(node_id); + int layer = rr_graph.node_layer(node_id); + for (int x = rr_graph.node_xlow(node_id); x <= rr_graph.node_xhigh(node_id); x++) { + chanx_width[layer][x][y]++; + } + } else if (rr_type == e_rr_type::CHANY) { + int x = rr_graph.node_xlow(node_id); + int layer = rr_graph.node_layer(node_id); + for (int y = rr_graph.node_ylow(node_id); y <= rr_graph.node_yhigh(node_id); y++) { + chany_width[layer][x][y]++; + } + } + } + + return {chanx_width, chany_width}; +} + void length_and_bends_stats(const Netlist<>& net_list, bool is_flat) { int max_bends = 0; int total_bends = 0; diff --git a/vpr/src/base/stats.h b/vpr/src/base/stats.h index 4f7a3017c5f..48c0bd4c4e9 100644 --- a/vpr/src/base/stats.h +++ b/vpr/src/base/stats.h @@ -22,6 +22,8 @@ void routing_stats(const Netlist<>& net_list, int wire_to_ipin_switch, bool is_flat); +std::pair, vtr::NdMatrix> calculate_channel_width(); + void print_wirelen_prob_dist(bool is_flat); void print_lambda(); From b1e78f032999e1b875dde313e423b5532750f56f Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Fri, 16 May 2025 15:07:52 -0400 Subject: [PATCH 24/66] calculate channel utilization by using channel width info extracted from RR graph --- vpr/src/place/net_cost_handler.cpp | 21 ++++++++++++++++++--- vpr/src/place/net_cost_handler.h | 4 +++- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index cdd220bc61c..3c0c23781f9 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -1746,17 +1746,32 @@ double NetCostHandler::estimate_routing_chann_util() { } } - const t_chan_width& chan_width = device_ctx.chan_width; +// const t_chan_width& chan_width = device_ctx.chan_width; + + if (chanx_width_.empty()) { + VTR_ASSERT(chany_width_.empty()); + std::tie(chanx_width_, chany_width_) = calculate_channel_width(); + } + for (size_t x = 0; x < chanx_util_.dim_size(0); ++x) { for (size_t y = 0; y < chanx_util_.dim_size(1); ++y) { - chanx_util_[x][y] /= chan_width.x_list[y]; + if (chanx_width_[0][x][y] > 0) { + chanx_util_[x][y] /= chanx_width_[0][x][y]; + } else { + chanx_util_[x][y] = 1.; + } + } } for (size_t x = 0; x < chany_util_.dim_size(0); ++x) { for (size_t y = 0; y < chany_util_.dim_size(1); ++y) { - chany_util_[x][y] /= chan_width.y_list[x]; + if (chany_width_[0][x][y] > 0) { + chany_util_[x][y] /= chany_width_[0][x][y]; + } else { + chany_util_[x][y] = 1.; + } } } diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index 223e3d87d5a..3ec31724911 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -245,10 +245,12 @@ class NetCostHandler { vtr::PrefixSum2D acc_chanx_util_; vtr::PrefixSum2D acc_chany_util_; - vtr::Matrix chanx_util_; vtr::Matrix chany_util_; + vtr::NdMatrix chanx_width_; + vtr::NdMatrix chany_width_; + /** * @brief The matrix below is used to calculate a chanz_place_cost_fac based on the average channel width in From 3d8941737cf8b670656517806951507a6f8ae25c Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Fri, 23 May 2025 18:46:37 -0400 Subject: [PATCH 25/66] remove distance factor when computing congestion cost for each net --- vpr/src/place/net_cost_handler.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 3c0c23781f9..002b5d849cf 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -1435,7 +1435,8 @@ double NetCostHandler::get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts) float x_chan_cong = (x_chan_util < threshold) ? 0.0f : x_chan_util - threshold; float y_chan_cong = (y_chan_util < threshold) ? 0.0f : y_chan_util - threshold; - return (distance_x * x_chan_cong) + (distance_y * y_chan_cong); +// return (distance_x * x_chan_cong) + (distance_y * y_chan_cong); + return x_chan_cong + y_chan_cong; } double NetCostHandler::get_net_per_layer_bb_cost_(ClusterNetId net_id, bool use_ts) { From 898c73d1997c429a080254c299dfedd3bababb14 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Tue, 27 May 2025 17:43:20 -0400 Subject: [PATCH 26/66] re-normalize timing-tradeoff --- vpr/src/place/annealer.cpp | 25 +++++++++++++++++-------- vpr/src/place/annealer.h | 4 +++- vpr/src/place/place_util.cpp | 4 ++-- 3 files changed, 22 insertions(+), 11 deletions(-) diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index 0d0abd63781..1938b456c79 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -227,6 +227,9 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts, , congestion_modeling_started_(false) { const auto& device_ctx = g_vpr_ctx.device(); + congestion_factor_ = placer_opts_.congestion_factor; + placer_opts_.congestion_factor = 0.; + float first_crit_exponent; if (placer_opts.place_algorithm.is_timing_driven()) { first_crit_exponent = placer_opts.td_place_exp_first; /*this will be modified when rlim starts to change */ @@ -467,7 +470,7 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator, placer_opts_.timing_tradeoff, timing_delta_c, costs_.timing_cost_norm); - delta_c = (1 - placer_opts_.timing_tradeoff) * bb_delta_c * costs_.bb_cost_norm + delta_c = (1 - placer_opts_.timing_tradeoff - placer_opts_.congestion_factor) * bb_delta_c * costs_.bb_cost_norm + placer_opts_.timing_tradeoff * timing_delta_c * costs_.timing_cost_norm + placer_opts_.congestion_factor * congestion_delta_c * costs_.congestion_cost_norm; } else if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) { @@ -672,9 +675,20 @@ void PlacementAnnealer::outer_loop_update_timing_info() { } if (congestion_modeling_started_ - || (placer_stats_.success_rate < placer_opts_.congestion_acceptance_rate_trigger && placer_stats_.av_cost != 0.)) { + || (annealing_state_.rlim / MoveGenerator::first_rlim) < placer_opts_.congestion_acceptance_rate_trigger) { costs_.congestion_cost = net_cost_handler_.estimate_routing_chann_util(); - congestion_modeling_started_ = true; + + + if (!congestion_modeling_started_) { + VTR_LOG("Congestion modeling started. %f %f\n", placer_opts_.congestion_factor, placer_opts_.timing_tradeoff); + placer_opts_.congestion_factor = congestion_factor_; + placer_opts_.congestion_factor /= 1.f + congestion_factor_; +// placer_opts_.congestion_factor /= 1.f + placer_opts_.congestion_factor; + placer_opts_.timing_tradeoff /= 1.f + congestion_factor_; + VTR_LOG("Congestion modeling started. %f %f\n", placer_opts_.congestion_factor, placer_opts_.timing_tradeoff); + congestion_modeling_started_ = true; + } + } // Update the cost normalization factors @@ -760,11 +774,6 @@ void PlacementAnnealer::placement_inner_loop() { // Calculate the success_rate and std_dev of the costs. placer_stats_.calc_iteration_stats(costs_, annealing_state_.move_lim); - if (congestion_modeling_started_ || placer_stats_.success_rate < placer_opts_.congestion_acceptance_rate_trigger) { - net_cost_handler_.estimate_routing_chann_util(); - congestion_modeling_started_ = true; - } - // update the RL agent's state if (!quench_started_) { if (placer_opts_.place_algorithm.is_timing_driven() && placer_opts_.place_agent_multistate && agent_state_ == e_agent_state::EARLY_IN_THE_ANNEAL) { diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h index 07fd4fed3d3..1733d18d9d5 100644 --- a/vpr/src/place/annealer.h +++ b/vpr/src/place/annealer.h @@ -268,7 +268,9 @@ class PlacementAnnealer { float estimate_starting_temperature_(); private: - const t_placer_opts& placer_opts_; + t_placer_opts placer_opts_; + float congestion_factor_; + PlacerState& placer_state_; const PlaceMacros& place_macros_; /// Stores different placement cost terms diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp index 1f8b2afe29d..f65f00aa5a2 100644 --- a/vpr/src/place/place_util.cpp +++ b/vpr/src/place/place_util.cpp @@ -42,10 +42,10 @@ double t_placer_costs::get_total_cost(const t_placer_opts& placer_opts, const t_ total_cost = bb_cost * bb_cost_norm; } else if (placer_opts.place_algorithm.is_timing_driven()) { // in timing mode we include both wirelength and timing costs - total_cost = (1 - placer_opts.timing_tradeoff) * (bb_cost * bb_cost_norm) + (placer_opts.timing_tradeoff) * (timing_cost * timing_cost_norm); + total_cost = (1 - placer_opts.timing_tradeoff - placer_opts.congestion_factor) * (bb_cost * bb_cost_norm) + (placer_opts.timing_tradeoff) * (timing_cost * timing_cost_norm); } - total_cost += congestion_cost * congestion_cost_norm; + total_cost += placer_opts.congestion_factor * congestion_cost * congestion_cost_norm; if (noc_opts.noc) { // in noc mode we include noc aggregate bandwidth, noc latency, and noc congestion From 46a089e4a2f01f54e5c07c3fa687dd9816a0be80 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 18 Jun 2025 18:11:07 -0400 Subject: [PATCH 27/66] add congestion_modeling_started_ flag to NetCostHandler --- vpr/src/place/annealer.h | 2 +- vpr/src/place/net_cost_handler.cpp | 104 +++++++++++++++++------------ vpr/src/place/net_cost_handler.h | 1 + 3 files changed, 64 insertions(+), 43 deletions(-) diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h index db4d4f6bbc9..fbdccf9abef 100644 --- a/vpr/src/place/annealer.h +++ b/vpr/src/place/annealer.h @@ -329,7 +329,7 @@ class PlacementAnnealer { int tot_iter_; /// Indicates whether the annealer has entered into the quench stage bool quench_started_; - + /// Indicates whether routing congestion modeling has been started bool congestion_modeling_started_; void LOG_MOVE_STATS_HEADER(); diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 002b5d849cf..c114d309c4f 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -97,7 +97,8 @@ static double wirelength_crossing_count(size_t fanout); NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, PlacerState& placer_state, bool cube_bb) - : cube_bb_(cube_bb) + : congestion_modeling_started_(false) + , cube_bb_(cube_bb) , placer_state_(placer_state) , placer_opts_(placer_opts) { const auto& device_ctx = g_vpr_ctx.device(); @@ -279,7 +280,6 @@ std::tuple NetCostHandler::comp_cube_bb_cong_cost_(e_cos double bb_cost = 0.; double expected_wirelength = 0.; - double cong_cost = 0.; for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { @@ -299,18 +299,17 @@ std::tuple NetCostHandler::comp_cube_bb_cong_cost_(e_cos } } - // Now that all bounding boxes are computed from scratch, we recompute the channel utilization -// estimate_routing_chann_util(); - + double cong_cost = 0.; // Compute congestion cost using recomputed bounding boxes and channel utilization map - for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { - if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { - net_cong_cost_[net_id] = get_net_cube_cong_cost_(net_id, /*use_ts=*/false); - cong_cost += net_cong_cost_[net_id]; + if (congestion_modeling_started_) { + for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { + if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { + net_cong_cost_[net_id] = get_net_cube_cong_cost_(net_id, /*use_ts=*/false); + cong_cost += net_cong_cost_[net_id]; + } } } - return {bb_cost, expected_wirelength, cong_cost}; } @@ -571,11 +570,12 @@ void NetCostHandler::get_non_updatable_cube_bb_(ClusterNetId net_id, bool use_ts num_sink_pin_layer[pin_loc.layer_num]++; } - // the average channel utilization that is going to be updated by this function - auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id]; - const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1); - x_chan_util = acc_chanx_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; - y_chan_util = acc_chany_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; + if (congestion_modeling_started_) { + auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id]; + const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1); + x_chan_util = acc_chanx_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; + y_chan_util = acc_chany_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; + } } void NetCostHandler::get_non_updatable_per_layer_bb_(ClusterNetId net_id, bool use_ts) { @@ -883,11 +883,12 @@ void NetCostHandler::update_bb_(ClusterNetId net_id, bb_update_status_[net_id] = NetUpdateState::UPDATED_ONCE; } - // the average channel utilization that is going to be updated by this function - auto& [x_chan_util, y_chan_util] = ts_avg_chann_util_new_[net_id]; - const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1); - x_chan_util = acc_chanx_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; - y_chan_util = acc_chany_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; + if (congestion_modeling_started_) { + auto& [x_chan_util, y_chan_util] = ts_avg_chann_util_new_[net_id]; + const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1); + x_chan_util = acc_chanx_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; + y_chan_util = acc_chany_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; + } } void NetCostHandler::update_layer_bb_(ClusterNetId net_id, @@ -1323,11 +1324,12 @@ void NetCostHandler::get_bb_from_scratch_(ClusterNetId net_id, bool use_ts) { num_on_edges.layer_min = layer_min_edge; num_on_edges.layer_max = layer_max_edge; - // the average channel utilization that is going to be updated by this function - auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id]; - const int total_channels = (coords.xmax - coords.xmin + 1) * (coords.ymax - coords.ymin + 1); - x_chan_util = acc_chanx_util_.get_sum(coords.xmin, coords.ymin, coords.xmax, coords.ymax) / total_channels; - y_chan_util = acc_chany_util_.get_sum(coords.xmin, coords.ymin, coords.xmax, coords.ymax) / total_channels; + if (congestion_modeling_started_) { + auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id]; + const int total_channels = (coords.xmax - coords.xmin + 1) * (coords.ymax - coords.ymin + 1); + x_chan_util = acc_chanx_util_.get_sum(coords.xmin, coords.ymin, coords.xmax, coords.ymax) / total_channels; + y_chan_util = acc_chany_util_.get_sum(coords.xmin, coords.ymin, coords.xmax, coords.ymax) / total_channels; + } } void NetCostHandler::get_layer_bb_from_scratch_(ClusterNetId net_id, @@ -1423,6 +1425,7 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) { } double NetCostHandler::get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts) { + VTR_ASSERT_SAFE(congestion_modeling_started_); const auto [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id]; const t_bb& bb = use_ts ? ts_bb_coord_new_[net_id] : bb_coords_[net_id]; @@ -1562,7 +1565,10 @@ std::pair NetCostHandler::recompute_bb_cong_cost_() { if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { // Bounding boxes don't have to be recomputed; they're correct. bb_cost += net_cost_[net_id]; - cong_cost += net_cong_cost_[net_id]; + + if (congestion_modeling_started_) { + cong_cost += net_cong_cost_[net_id]; + } } } @@ -1585,10 +1591,12 @@ void NetCostHandler::set_bb_delta_cost_(double& bb_delta_c, double& congestion_d ClusterNetId net_id = ts_net; proposed_net_cost_[net_id] = get_net_bb_cost_functor_(net_id); - proposed_net_cong_cost_[net_id] = get_net_cube_cong_cost_(net_id, /*use_ts=*/true); - bb_delta_c += proposed_net_cost_[net_id] - net_cost_[net_id]; - congestion_delta_c += proposed_net_cong_cost_[net_id] - net_cong_cost_[net_id]; + + if (congestion_modeling_started_) { + proposed_net_cong_cost_[net_id] = get_net_cube_cong_cost_(net_id, /*use_ts=*/true); + congestion_delta_c += proposed_net_cong_cost_[net_id] - net_cong_cost_[net_id]; + } } } @@ -1600,6 +1608,7 @@ void NetCostHandler::find_affected_nets_and_update_costs(const PlaceDelayModel* double& congestion_delta_c) { VTR_ASSERT_DEBUG(bb_delta_c == 0.); VTR_ASSERT_DEBUG(timing_delta_c == 0.); + VTR_ASSERT_DEBUG(congestion_delta_c == 0.); const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist; ts_nets_to_update_.resize(0); @@ -1649,21 +1658,27 @@ void NetCostHandler::update_move_nets() { } net_cost_[net_id] = proposed_net_cost_[net_id]; - net_cong_cost_[net_id] = proposed_net_cong_cost_[net_id]; - - /* negative proposed_net_cost value is acting as a flag to mean not computed yet. */ + // negative proposed_net_cost value is acting as a flag to mean not computed yet. proposed_net_cost_[net_id] = -1; - proposed_net_cong_cost_[net_id] = -1; + + if (congestion_modeling_started_) { + net_cong_cost_[net_id] = proposed_net_cong_cost_[net_id]; + proposed_net_cong_cost_[net_id] = -1; + } + bb_update_status_[net_id] = NetUpdateState::NOT_UPDATED_YET; } } void NetCostHandler::reset_move_nets() { - /* Reset the net cost function flags first. */ - for (const ClusterNetId ts_net : ts_nets_to_update_) { - ClusterNetId net_id = ts_net; + // Reset the net cost function flags first. + for (const ClusterNetId net_id : ts_nets_to_update_) { proposed_net_cost_[net_id] = -1; - proposed_net_cong_cost_[net_id] = -1; + + if (congestion_modeling_started_) { + proposed_net_cong_cost_[net_id] = -1; + } + bb_update_status_[net_id] = NetUpdateState::NOT_UPDATED_YET; } } @@ -1682,11 +1697,16 @@ void NetCostHandler::recompute_costs_from_scratch(const PlaceDelayModel* delay_m } }; - auto[new_bb_cost, new_cong_cost] = recompute_bb_cong_cost_(); + auto [new_bb_cost, new_cong_cost] = recompute_bb_cong_cost_(); check_and_print_cost(new_bb_cost, costs.bb_cost, "bb_cost"); - check_and_print_cost(new_cong_cost, costs.congestion_cost, "cong_cost"); costs.bb_cost = new_bb_cost; - costs.congestion_cost = new_cong_cost; + + if (congestion_modeling_started_) { + check_and_print_cost(new_cong_cost, costs.congestion_cost, "cong_cost"); + costs.congestion_cost = new_cong_cost; + } else { + costs.congestion_cost = 0.; + } if (placer_opts_.place_algorithm.is_timing_driven()) { double new_timing_cost = 0.; @@ -1747,8 +1767,6 @@ double NetCostHandler::estimate_routing_chann_util() { } } -// const t_chan_width& chan_width = device_ctx.chan_width; - if (chanx_width_.empty()) { VTR_ASSERT(chany_width_.empty()); std::tie(chanx_width_, chany_width_) = calculate_channel_width(); @@ -1779,6 +1797,8 @@ double NetCostHandler::estimate_routing_chann_util() { acc_chanx_util_ = vtr::PrefixSum2D(chanx_util_); acc_chany_util_ = vtr::PrefixSum2D(chany_util_); + congestion_modeling_started_ = true; + double cong_cost = 0.; // Compute congestion cost using recomputed bounding boxes and channel utilization map for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index 7e88086b3c4..b67c9c95464 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -129,6 +129,7 @@ class NetCostHandler { double estimate_routing_chann_util(); private: + bool congestion_modeling_started_; ///@brief Specifies whether the bounding box is computed using cube method or per-layer method. bool cube_bb_; ///@brief Determines whether the FPGA has multiple dies (layers) From 90addfb93173fc064027be2dfc0d3b64ad9cb02f Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 18 Jun 2025 18:12:30 -0400 Subject: [PATCH 28/66] make format --- vpr/src/base/stats.cpp | 4 ++-- vpr/src/base/stats.h | 2 +- vpr/src/place/annealer.cpp | 4 +--- vpr/src/place/net_cost_handler.cpp | 16 ++++++---------- vpr/src/place/net_cost_handler.h | 1 - vpr/src/place/placer.cpp | 2 +- 6 files changed, 11 insertions(+), 18 deletions(-) diff --git a/vpr/src/base/stats.cpp b/vpr/src/base/stats.cpp index df5d0e99728..79e507c27d7 100644 --- a/vpr/src/base/stats.cpp +++ b/vpr/src/base/stats.cpp @@ -267,8 +267,8 @@ static void get_channel_occupancy_stats(const Netlist<>& net_list, bool /***/) { } void write_channel_occupancy_table(const std::string_view filename, - const vtr::Matrix& occupancy, - const std::vector& capacity_list) { + const vtr::Matrix& occupancy, + const std::vector& capacity_list) { constexpr int w_coord = 6; constexpr int w_value = 12; constexpr int w_percent = 12; diff --git a/vpr/src/base/stats.h b/vpr/src/base/stats.h index 5fc00f0009a..93643384beb 100644 --- a/vpr/src/base/stats.h +++ b/vpr/src/base/stats.h @@ -69,4 +69,4 @@ void print_device_utilization(const float target_device_utilization); */ void write_channel_occupancy_table(const std::string_view filename, const vtr::Matrix& occupancy, - const std::vector& capacity_list); \ No newline at end of file + const std::vector& capacity_list); diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index a94941d96c9..f91a4a18d62 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -682,17 +682,15 @@ void PlacementAnnealer::outer_loop_update_timing_info() { || (annealing_state_.rlim / MoveGenerator::first_rlim) < placer_opts_.congestion_acceptance_rate_trigger) { costs_.congestion_cost = net_cost_handler_.estimate_routing_chann_util(); - if (!congestion_modeling_started_) { VTR_LOG("Congestion modeling started. %f %f\n", placer_opts_.congestion_factor, placer_opts_.timing_tradeoff); placer_opts_.congestion_factor = congestion_factor_; placer_opts_.congestion_factor /= 1.f + congestion_factor_; -// placer_opts_.congestion_factor /= 1.f + placer_opts_.congestion_factor; + // placer_opts_.congestion_factor /= 1.f + placer_opts_.congestion_factor; placer_opts_.timing_tradeoff /= 1.f + congestion_factor_; VTR_LOG("Congestion modeling started. %f %f\n", placer_opts_.congestion_factor, placer_opts_.timing_tradeoff); congestion_modeling_started_ = true; } - } // Update the cost normalization factors diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index c114d309c4f..7a3f24b962a 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -155,14 +155,14 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, alloc_and_load_chan_w_factors_for_place_cost_(); chanx_util_ = vtr::Matrix({{ - device_ctx.grid.width(), //[0 .. device_ctx.grid.width() - 1] (length of x channel) - device_ctx.grid.height() //[0 .. device_ctx.grid.height() - 1] (# x channels) + device_ctx.grid.width(), //[0 .. device_ctx.grid.width() - 1] (length of x channel) + device_ctx.grid.height() //[0 .. device_ctx.grid.height() - 1] (# x channels) }}, 0); chany_util_ = vtr::Matrix({{ - device_ctx.grid.width(), //[0 .. device_ctx.grid.width() - 1] (# y channels) - device_ctx.grid.height() //[0 .. device_ctx.grid.height() - 1] (length of y channel) + device_ctx.grid.width(), //[0 .. device_ctx.grid.width() - 1] (# y channels) + device_ctx.grid.height() //[0 .. device_ctx.grid.height() - 1] (length of y channel) }}, 0); @@ -342,8 +342,6 @@ std::tuple NetCostHandler::comp_per_layer_bb_cost_(e_cos } } - - return {cost, expected_wirelength, cong_cost}; } @@ -1438,8 +1436,8 @@ double NetCostHandler::get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts) float x_chan_cong = (x_chan_util < threshold) ? 0.0f : x_chan_util - threshold; float y_chan_cong = (y_chan_util < threshold) ? 0.0f : y_chan_util - threshold; -// return (distance_x * x_chan_cong) + (distance_y * y_chan_cong); - return x_chan_cong + y_chan_cong; + // return (distance_x * x_chan_cong) + (distance_y * y_chan_cong); + return x_chan_cong + y_chan_cong; } double NetCostHandler::get_net_per_layer_bb_cost_(ClusterNetId net_id, bool use_ts) { @@ -1772,7 +1770,6 @@ double NetCostHandler::estimate_routing_chann_util() { std::tie(chanx_width_, chany_width_) = calculate_channel_width(); } - for (size_t x = 0; x < chanx_util_.dim_size(0); ++x) { for (size_t y = 0; y < chanx_util_.dim_size(1); ++y) { if (chanx_width_[0][x][y] > 0) { @@ -1780,7 +1777,6 @@ double NetCostHandler::estimate_routing_chann_util() { } else { chanx_util_[x][y] = 1.; } - } } diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index b67c9c95464..1d83b7ae5fa 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -251,7 +251,6 @@ class NetCostHandler { vtr::NdMatrix chanx_width_; vtr::NdMatrix chany_width_; - /** * @brief The matrix below is used to calculate a chanz_place_cost_fac based on the average channel width in * the cross-die-layer direction over a 2D (x,y) region. We don't assume the inter-die connectivity is the same at all (x,y) locations, so we diff --git a/vpr/src/place/placer.cpp b/vpr/src/place/placer.cpp index 944d8bcbb57..1fe8e837e3b 100644 --- a/vpr/src/place/placer.cpp +++ b/vpr/src/place/placer.cpp @@ -123,7 +123,7 @@ Placer::Placer(const Netlist<>& net_list, } else { VTR_ASSERT(placer_opts.place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE); // Timing cost is not used - costs_.timing_cost = std::numeric_limits::quiet_NaN();; + costs_.timing_cost = std::numeric_limits::quiet_NaN(); } costs_.update_norm_factors(); From 08d48b0ab46721d1890662530bad8abbddc43f77 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Wed, 2 Jul 2025 17:31:46 -0400 Subject: [PATCH 29/66] make format --- vpr/src/place/net_cost_handler.cpp | 66 ++++++++++++++++-------------- 1 file changed, 35 insertions(+), 31 deletions(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index bf304b762bd..64cb2c7287b 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -96,7 +96,9 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, , placer_opts_(placer_opts) { const auto& device_ctx = g_vpr_ctx.device(); - const int num_layers = device_ctx.grid.get_num_layers(); + const size_t grid_width = device_ctx.grid.width(); + const size_t grid_height = device_ctx.grid.height(); + const size_t num_layers = device_ctx.grid.get_num_layers(); const size_t num_nets = g_vpr_ctx.clustering().clb_nlist.nets().size(); is_multi_layer_ = num_layers > 1; @@ -147,25 +149,20 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, alloc_and_load_chan_w_factors_for_place_cost_(); - chanx_util_ = vtr::NdMatrix({{(size_t)device_ctx.grid.get_num_layers(), - device_ctx.grid.width(), - device_ctx.grid.height()}}, - 0); - - chany_util_ = vtr::NdMatrix({{(size_t)device_ctx.grid.get_num_layers(), - device_ctx.grid.width(), - device_ctx.grid.height()}}, - 0); - - acc_chanx_util_ = vtr::PrefixSum2D(chanx_util_.dim_size(1), chanx_util_.dim_size(2), - [&](size_t x, size_t y) -> double { - return chanx_util_[0][x][y]; - }, 0); - - acc_chany_util_ = vtr::PrefixSum2D(chany_util_.dim_size(1), chany_util_.dim_size(2), - [&](size_t x, size_t y) -> double { - return chany_util_[0][x][y]; - }, 0); + chanx_util_ = vtr::NdMatrix({{num_layers, grid_width, grid_height}}, 0); + chany_util_ = vtr::NdMatrix({{num_layers, grid_width, grid_height}}, 0); + + acc_chanx_util_ = vtr::PrefixSum2D(grid_width, + grid_height, + [&](size_t x, size_t y) { + return chanx_util_[0][x][y]; + }); + + acc_chany_util_ = vtr::PrefixSum2D(grid_width, + grid_height, + [&](size_t x, size_t y) { + return chany_util_[0][x][y]; + }); } void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_() { @@ -1426,8 +1423,8 @@ double NetCostHandler::get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts) const t_bb& bb = use_ts ? ts_bb_coord_new_[net_id] : bb_coords_[net_id]; -// int distance_x = bb.xmax - bb.xmin + 1; -// int distance_y = bb.ymax - bb.ymin + 1; + // int distance_x = bb.xmax - bb.xmin + 1; + // int distance_y = bb.ymax - bb.ymin + 1; const float threshold = placer_opts_.congestion_chan_util_threshold; @@ -1730,6 +1727,11 @@ double NetCostHandler::get_total_wirelength_estimate() const { double NetCostHandler::estimate_routing_chan_util() { const auto& cluster_ctx = g_vpr_ctx.clustering(); + const DeviceContext& device_ctx = g_vpr_ctx.device(); + + const size_t grid_width = device_ctx.grid.width(); + const size_t grid_height = device_ctx.grid.height(); + const size_t num_layers = device_ctx.grid.get_num_layers(); chanx_util_.fill(0.); chany_util_.fill(0.); @@ -1831,15 +1833,17 @@ double NetCostHandler::estimate_routing_chan_util() { // For now, congestion modeling in the placement stage is limited to a single die // TODO: extend it to multiple dice - acc_chanx_util_ = vtr::PrefixSum2D(chanx_util_.dim_size(1), chanx_util_.dim_size(2), - [&](size_t x, size_t y) -> double { - return chanx_util_[0][x][y]; - }, 0); - - acc_chany_util_ = vtr::PrefixSum2D(chany_util_.dim_size(1), chany_util_.dim_size(2), - [&](size_t x, size_t y) -> double { - return chany_util_[0][x][y]; - }, 0); + acc_chanx_util_ = vtr::PrefixSum2D(grid_width, + grid_height, + [&](size_t x, size_t y) { + return chanx_util_[0][x][y]; + }); + + acc_chany_util_ = vtr::PrefixSum2D(grid_width, + grid_height, + [&](size_t x, size_t y) { + return chany_util_[0][x][y]; + }); congestion_modeling_started_ = true; From 4c635b8784d654b39479f05429db1b1b91a37398 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Thu, 3 Jul 2025 11:18:51 -0400 Subject: [PATCH 30/66] add doxygen comments for congestion cost terms --- vpr/src/place/place_util.cpp | 2 +- vpr/src/place/place_util.h | 76 +++++++++++++++++------------------- 2 files changed, 37 insertions(+), 41 deletions(-) diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp index f65f00aa5a2..9b839e646b8 100644 --- a/vpr/src/place/place_util.cpp +++ b/vpr/src/place/place_util.cpp @@ -22,7 +22,7 @@ void t_placer_costs::update_norm_factors() { } if (place_algorithm.is_timing_driven()) { - //Prevent the norm factor from going to infinity + // Prevent the norm factor from going to infinity timing_cost_norm = std::min(1 / timing_cost, MAX_INV_TIMING_COST); } else { // Timing normalization factor is not used diff --git a/vpr/src/place/place_util.h b/vpr/src/place/place_util.h index c3d6a6669d4..0caa10b8d57 100644 --- a/vpr/src/place/place_util.h +++ b/vpr/src/place/place_util.h @@ -63,39 +63,33 @@ struct NocCostTerms { * values of the previous iteration. However, the divisions are expensive, * so we store their multiplicative inverses when they are updated in * the outer loop routines to speed up the normalization process. - * - * @param cost The weighted average of the wiring cost and the timing cost. - * @param bb_cost The bounding box cost, aka the wiring cost. - * @param timing_cost The timing cost, which is connection delay * criticality. - * - * @param bb_cost_norm The normalization factor for the wiring cost. - * @param timing_cost_norm The normalization factor for the timing cost, which - * is upper-bounded by the value of MAX_INV_TIMING_COST. - * - * @param noc_cost_terms NoC-related cost terms - * @param noc_cost_norm_factors Normalization factors for NoC-related cost terms. - * - * @param MAX_INV_TIMING_COST Stops inverse timing cost from going to infinity - * with very lax timing constraints, which avoids multiplying by a - * gigantic timing_cost_norm when auto-normalizing. The exact value - * of this cost has relatively little impact, but should be large - * enough to not affect the timing costs computation for normal - * constraints. - * - * @param place_algorithm Determines how the member values are updated upon - * each temperature change during the placer annealing process. */ class t_placer_costs { public: //members + /// The weighted average of the wiring cost, the timing cost, and the congestion cost (if enabled) double cost = 0.; + + /// The bounding box cost, aka the wiring cost. double bb_cost = 0.; + + /// The timing cost, which is connection delay * criticality. double timing_cost = 0.; + + /// The congestion cost, which estimates how much routing channels are over-utilized. double congestion_cost = 0.; + + /// The normalization factor for the wiring cost. double bb_cost_norm = 0.; + + /// The normalization factor for the timing cost, which is upper-bounded by the value of MAX_INV_TIMING_COST. double timing_cost_norm = 0.; + + /// The normalization factor for the congestion cost. double congestion_cost_norm = 0.; + /// NoC-related cost terms. NocCostTerms noc_cost_terms; + /// Normalization factors for NoC-related cost terms. NocCostTerms noc_cost_norm_factors; public: //Constructor @@ -133,7 +127,18 @@ class t_placer_costs { t_placer_costs& operator+=(const NocCostTerms& noc_delta_cost); private: + /** + * @brief Stops inverse timing cost from going to infinity + * with very lax timing constraints, which avoids multiplying by a + * gigantic timing_cost_norm when auto-normalizing. The exact value + * of this cost has relatively little impact, but should be large + * enough to not affect the timing costs computation for normal + * constraints. + */ static constexpr double MAX_INV_TIMING_COST = 1.e12; + + /// Determines how the member values are updated upon + /// each temperature change during the placer annealing process. t_place_algorithm place_algorithm; bool noc_enabled; }; @@ -150,39 +155,30 @@ class t_placer_costs { * In terms of calculating statistics for total cost, we mean that we * operate upon the set of placer cost values gathered after every * accepted block move. - * - * @param av_cost - * Average total cost. Cost formulation depends on - * the place algorithm currently being used. - * @param av_bb_cost - * Average bounding box (wiring) cost. - * @param av_timing_cost - * Average timing cost (delay * criticality). - * @param sum_of_squares - * Sum of squares of the total cost. - * @param success_num - * Number of accepted block swaps for the current iteration. - * @param success_rate - * num_accepted / total_trials for the current iteration. - * @param std_dev - * Standard deviation of the total cost. - * */ class t_placer_statistics { public: + /// Average total cost. Cost formulation depends on the place algorithm currently being used. double av_cost; + /// Average bounding box (wiring) cost. double av_bb_cost; + /// Average timing cost (delay * criticality). double av_timing_cost; + /// Average congestion cost. double av_cong_cost; + /// Sum of squares of the total cost. double sum_of_squares; + /// Number of accepted block swaps for the current iteration. int success_sum; + /// num_accepted / total_trials for the current iteration. float success_rate; + /// Standard deviation of the total cost. double std_dev; - public: //Constructor + public: // Constructor t_placer_statistics() { reset(); } - public: //Mutator + public: // Mutator ///@brief Clear all data fields. void reset(); From 3becbb669cac971edf2d52a2809e68d31307fb13 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Thu, 3 Jul 2025 11:55:43 -0400 Subject: [PATCH 31/66] rename congestion_acceptance_rate_trigger to congestion_rlim_trigger_ratio to be more consistent with what it actually does --- vpr/src/base/SetupVPR.cpp | 2 +- vpr/src/base/read_options.cpp | 2 +- vpr/src/base/read_options.h | 2 +- vpr/src/base/vpr_types.h | 2 +- vpr/src/place/annealer.cpp | 24 +++++++++++------------- vpr/src/place/net_cost_handler.cpp | 9 +++++---- vpr/src/place/place_util.cpp | 4 ++-- 7 files changed, 22 insertions(+), 23 deletions(-) diff --git a/vpr/src/base/SetupVPR.cpp b/vpr/src/base/SetupVPR.cpp index d567219a035..623efca577d 100644 --- a/vpr/src/base/SetupVPR.cpp +++ b/vpr/src/base/SetupVPR.cpp @@ -656,7 +656,7 @@ static void SetupPlacerOpts(const t_options& Options, t_placer_opts* PlacerOpts) PlacerOpts->timing_tradeoff = Options.PlaceTimingTradeoff; PlacerOpts->congestion_factor = Options.place_congestion_factor; - PlacerOpts->congestion_acceptance_rate_trigger = Options.place_congestion_acceptance_rate_trigger; + PlacerOpts->congestion_rlim_trigger_ratio = Options.place_congestion_rlim_trigger_ratio; PlacerOpts->congestion_chan_util_threshold = Options.place_congestion_chan_util_threshold; /* Depends on PlacerOpts->place_algorithm */ diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index c49d062a835..b631145c5df 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -2494,7 +2494,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .default_value("0.0") .show_in(argparse::ShowIn::HELP_ONLY); - place_timing_grp.add_argument(args.place_congestion_acceptance_rate_trigger, "--congestion_acceptance_rate_trigger") + place_timing_grp.add_argument(args.place_congestion_rlim_trigger_ratio, "--congestion_rlim_trigger_ratio") .help("To be written") .default_value("0.0") .show_in(argparse::ShowIn::HELP_ONLY); diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h index babfc26396c..267dd2ab8cf 100644 --- a/vpr/src/base/read_options.h +++ b/vpr/src/base/read_options.h @@ -188,7 +188,7 @@ struct t_options { /* Timing-driven placement options only */ argparse::ArgValue PlaceTimingTradeoff; argparse::ArgValue place_congestion_factor; - argparse::ArgValue place_congestion_acceptance_rate_trigger; + argparse::ArgValue place_congestion_rlim_trigger_ratio; argparse::ArgValue place_congestion_chan_util_threshold; argparse::ArgValue RecomputeCritIter; diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 3218d23dc06..badd2174c88 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -1014,7 +1014,7 @@ struct t_placer_opts { t_annealing_sched anneal_sched; /// 0. && rng_.frand() < placer_opts_.rlim_escape_fraction) { rlim = std::numeric_limits::infinity(); @@ -401,19 +399,19 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator, router_block_move = check_for_router_swap(noc_opts_.noc_swap_percentage, rng_); } - //When manual move toggle button is active, the manual move window asks the user for input. + // When manual move toggle button is active, the manual move window asks the user for input. if (manual_move_enabled) { #ifndef NO_GRAPHICS create_move_outcome = manual_move_display_and_propose(manual_move_generator_, blocks_affected_, proposed_action.move_type, rlim, placer_opts_, criticalities_); -#endif //NO_GRAPHICS +#endif // NO_GRAPHICS } else if (router_block_move) { // generate a move where two random router blocks are swapped create_move_outcome = propose_router_swap(blocks_affected_, rlim, blk_loc_registry, place_macros_, rng_); proposed_action.move_type = e_move_type::UNIFORM; } else { - //Generate a new move (perturbation) used to explore the space of possible placements + // Generate a new move (perturbation) used to explore the space of possible placements create_move_outcome = move_generator.propose_move(blocks_affected_, proposed_action, rlim, placer_opts_, criticalities_); } @@ -681,7 +679,7 @@ void PlacementAnnealer::outer_loop_update_timing_info() { } if (congestion_modeling_started_ - || (annealing_state_.rlim / MoveGenerator::first_rlim) < placer_opts_.congestion_acceptance_rate_trigger) { + || (annealing_state_.rlim / MoveGenerator::first_rlim) < placer_opts_.congestion_rlim_trigger_ratio) { costs_.congestion_cost = net_cost_handler_.estimate_routing_chan_util(); if (!congestion_modeling_started_) { diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 64cb2c7287b..44355b3b8f7 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -1421,7 +1421,7 @@ double NetCostHandler::get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts) VTR_ASSERT_SAFE(congestion_modeling_started_); const auto [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id]; - const t_bb& bb = use_ts ? ts_bb_coord_new_[net_id] : bb_coords_[net_id]; + // const t_bb& bb = use_ts ? ts_bb_coord_new_[net_id] : bb_coords_[net_id]; // int distance_x = bb.xmax - bb.xmin + 1; // int distance_y = bb.ymax - bb.ymin + 1; @@ -1802,6 +1802,7 @@ double NetCostHandler::estimate_routing_chan_util() { } } + // Channel width is computed only once and reused in later calls. if (chanx_width_.empty()) { VTR_ASSERT(chany_width_.empty()); std::tie(chanx_width_, chany_width_) = calculate_channel_width(); @@ -1811,9 +1812,9 @@ double NetCostHandler::estimate_routing_chan_util() { VTR_ASSERT(chanx_util_.size() == chanx_width_.size()); VTR_ASSERT(chany_util_.size() == chany_width_.size()); - for (size_t layer = 0; layer < chanx_util_.dim_size(0); ++layer) { - for (size_t x = 0; x < chanx_util_.dim_size(1); ++x) { - for (size_t y = 0; y < chanx_util_.dim_size(2); ++y) { + for (size_t layer = 0; layer < num_layers; ++layer) { + for (size_t x = 0; x < grid_width; ++x) { + for (size_t y = 0; y < grid_height; ++y) { if (chanx_width_[layer][x][y] > 0) { chanx_util_[layer][x][y] /= chanx_width_[layer][x][y]; } else { diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp index 9b839e646b8..c23029b0e00 100644 --- a/vpr/src/place/place_util.cpp +++ b/vpr/src/place/place_util.cpp @@ -11,7 +11,7 @@ #include "noc_place_utils.h" void t_placer_costs::update_norm_factors() { - const auto& clustered_nlist = g_vpr_ctx.clustering().clb_nlist; + const ClusteredNetlist& clustered_nlist = g_vpr_ctx.clustering().clb_nlist; bb_cost_norm = 1 / bb_cost; @@ -76,7 +76,7 @@ int get_place_inner_loop_num_move(const t_placer_opts& placer_opts, const t_anne move_lim = int(annealing_sched.inner_num * pow(device_size, 2. / 3.) * pow(num_blocks, 2. / 3.)); } - /* Avoid having a non-positive move_lim */ + // Avoid having a non-positive move_lim move_lim = std::max(move_lim, 1); return move_lim; From ae67ef82e09b9bcd8206438cdef41b8fad64983d Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Thu, 3 Jul 2025 12:08:16 -0400 Subject: [PATCH 32/66] update t_exit to avoid cost factor normalization when congestion modeling is enabeld --- vpr/src/place/annealer.cpp | 21 ++++++++++----------- vpr/src/place/annealer.h | 4 ++-- vpr/src/place/place_util.cpp | 2 +- 3 files changed, 13 insertions(+), 14 deletions(-) diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index 81597c13205..40655f73cc6 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -118,6 +118,7 @@ t_annealing_state::t_annealing_state(float first_t, } bool t_annealing_state::outer_loop_update(float success_rate, + bool congestion_modeling_enabled, const t_placer_costs& costs, const t_placer_opts& placer_opts) { #ifndef NO_GRAPHICS @@ -140,7 +141,12 @@ bool t_annealing_state::outer_loop_update(float success_rate, // Automatically determine exit temperature. const ClusteringContext& cluster_ctx = g_vpr_ctx.clustering(); - float t_exit = 0.005 * costs.cost / cluster_ctx.clb_nlist.nets().size(); + float t_exit; + if (congestion_modeling_enabled) { + t_exit = 0.005 * (1. + placer_opts.congestion_factor) * costs.cost / cluster_ctx.clb_nlist.nets().size(); + } else { + t_exit = 0.005 * costs.cost / cluster_ctx.clb_nlist.nets().size(); + } VTR_ASSERT_SAFE(placer_opts.anneal_sched.type == e_sched_type::AUTO_SCHED); // Automatically adjust alpha according to success rate. @@ -232,8 +238,6 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts, , congestion_modeling_started_(false) { const auto& device_ctx = g_vpr_ctx.device(); - congestion_factor_ = placer_opts_.congestion_factor; - placer_opts_.congestion_factor = 0.; float first_crit_exponent; if (placer_opts.place_algorithm.is_timing_driven()) { @@ -474,7 +478,7 @@ e_move_result PlacementAnnealer::try_swap_(MoveGenerator& move_generator, placer_opts_.timing_tradeoff, timing_delta_c, costs_.timing_cost_norm); - delta_c = (1 - placer_opts_.timing_tradeoff - placer_opts_.congestion_factor) * bb_delta_c * costs_.bb_cost_norm + delta_c = (1 - placer_opts_.timing_tradeoff) * bb_delta_c * costs_.bb_cost_norm + placer_opts_.timing_tradeoff * timing_delta_c * costs_.timing_cost_norm + placer_opts_.congestion_factor * congestion_delta_c * costs_.congestion_cost_norm; } else if (place_algorithm == e_place_algorithm::SLACK_TIMING_PLACE) { @@ -683,12 +687,7 @@ void PlacementAnnealer::outer_loop_update_timing_info() { costs_.congestion_cost = net_cost_handler_.estimate_routing_chan_util(); if (!congestion_modeling_started_) { - VTR_LOG("Congestion modeling started. %f %f\n", placer_opts_.congestion_factor, placer_opts_.timing_tradeoff); - placer_opts_.congestion_factor = congestion_factor_; - placer_opts_.congestion_factor /= 1.f + congestion_factor_; - // placer_opts_.congestion_factor /= 1.f + placer_opts_.congestion_factor; - placer_opts_.timing_tradeoff /= 1.f + congestion_factor_; - VTR_LOG("Congestion modeling started. %f %f\n", placer_opts_.congestion_factor, placer_opts_.timing_tradeoff); + VTR_LOG("Congestion modeling started.\n"); congestion_modeling_started_ = true; } } @@ -803,7 +802,7 @@ const t_annealing_state& PlacementAnnealer::get_annealing_state() const { } bool PlacementAnnealer::outer_loop_update_state() { - return annealing_state_.outer_loop_update(placer_stats_.success_rate, costs_, placer_opts_); + return annealing_state_.outer_loop_update(placer_stats_.success_rate, congestion_modeling_started_, costs_, placer_opts_); } void PlacementAnnealer::start_quench() { diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h index 324a633c083..c7183cce9a3 100644 --- a/vpr/src/place/annealer.h +++ b/vpr/src/place/annealer.h @@ -105,6 +105,7 @@ class t_annealing_state { * @return True->continues the annealing. False->exits the annealing. */ bool outer_loop_update(float success_rate, + bool congestion_modeling_enabled, const t_placer_costs& costs, const t_placer_opts& placer_opts); @@ -269,8 +270,7 @@ class PlacementAnnealer { float estimate_starting_temperature_(); private: - t_placer_opts placer_opts_; - float congestion_factor_; + const t_placer_opts& placer_opts_; PlacerState& placer_state_; const PlaceMacros& place_macros_; diff --git a/vpr/src/place/place_util.cpp b/vpr/src/place/place_util.cpp index c23029b0e00..e206037bb3e 100644 --- a/vpr/src/place/place_util.cpp +++ b/vpr/src/place/place_util.cpp @@ -42,7 +42,7 @@ double t_placer_costs::get_total_cost(const t_placer_opts& placer_opts, const t_ total_cost = bb_cost * bb_cost_norm; } else if (placer_opts.place_algorithm.is_timing_driven()) { // in timing mode we include both wirelength and timing costs - total_cost = (1 - placer_opts.timing_tradeoff - placer_opts.congestion_factor) * (bb_cost * bb_cost_norm) + (placer_opts.timing_tradeoff) * (timing_cost * timing_cost_norm); + total_cost = (1 - placer_opts.timing_tradeoff) * (bb_cost * bb_cost_norm) + (placer_opts.timing_tradeoff) * (timing_cost * timing_cost_norm); } total_cost += placer_opts.congestion_factor * congestion_cost * congestion_cost_norm; From 73dfa44de1e8c510560d11b8bc3779578c1050eb Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Thu, 3 Jul 2025 13:33:57 -0400 Subject: [PATCH 33/66] make format --- vpr/src/place/annealer.cpp | 7 ++----- vpr/src/place/annealer.h | 1 - 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index 40655f73cc6..845b109cbb5 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -141,11 +141,9 @@ bool t_annealing_state::outer_loop_update(float success_rate, // Automatically determine exit temperature. const ClusteringContext& cluster_ctx = g_vpr_ctx.clustering(); - float t_exit; + float t_exit = 0.005 * costs.cost / cluster_ctx.clb_nlist.nets().size(); if (congestion_modeling_enabled) { - t_exit = 0.005 * (1. + placer_opts.congestion_factor) * costs.cost / cluster_ctx.clb_nlist.nets().size(); - } else { - t_exit = 0.005 * costs.cost / cluster_ctx.clb_nlist.nets().size(); + t_exit *= (1. + placer_opts.congestion_factor); } VTR_ASSERT_SAFE(placer_opts.anneal_sched.type == e_sched_type::AUTO_SCHED); @@ -238,7 +236,6 @@ PlacementAnnealer::PlacementAnnealer(const t_placer_opts& placer_opts, , congestion_modeling_started_(false) { const auto& device_ctx = g_vpr_ctx.device(); - float first_crit_exponent; if (placer_opts.place_algorithm.is_timing_driven()) { first_crit_exponent = placer_opts.td_place_exp_first; /*this will be modified when rlim starts to change */ diff --git a/vpr/src/place/annealer.h b/vpr/src/place/annealer.h index c7183cce9a3..e406581956e 100644 --- a/vpr/src/place/annealer.h +++ b/vpr/src/place/annealer.h @@ -271,7 +271,6 @@ class PlacementAnnealer { private: const t_placer_opts& placer_opts_; - PlacerState& placer_state_; const PlaceMacros& place_macros_; /// Stores different placement cost terms From b1b52f6ef9032ac81baa9a260283f0544cc85c8b Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Mon, 7 Jul 2025 15:45:30 -0400 Subject: [PATCH 34/66] assume cube mode for routing chan util estimation in the routing stage --- vpr/src/route/route_common.cpp | 3 +-- vpr/src/route/route_utilization.cpp | 4 ++-- vpr/src/route/route_utilization.h | 4 ++-- 3 files changed, 5 insertions(+), 6 deletions(-) diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp index 3b0dc9344bf..0e452808d69 100644 --- a/vpr/src/route/route_common.cpp +++ b/vpr/src/route/route_common.cpp @@ -481,11 +481,10 @@ void reset_rr_node_route_structs(const t_router_opts& route_opts) { auto& route_ctx = g_vpr_ctx.mutable_routing(); const auto& device_ctx = g_vpr_ctx.device(); const auto& blk_loc_registry = g_vpr_ctx.placement().blk_loc_registry(); - const bool cube_bb = g_vpr_ctx.placement().cube_bb; VTR_ASSERT(route_ctx.rr_node_route_inf.size() == size_t(device_ctx.rr_graph.num_nodes())); - RoutingChanUtilEstimator routing_chan_util_estimator(blk_loc_registry, cube_bb); + RoutingChanUtilEstimator routing_chan_util_estimator(blk_loc_registry); const auto [chanx_util, chany_util] = routing_chan_util_estimator.estimate_routing_chan_util(); for (const RRNodeId rr_id : device_ctx.rr_graph.nodes()) { diff --git a/vpr/src/route/route_utilization.cpp b/vpr/src/route/route_utilization.cpp index a5dc3b0b4fc..5127f6e8db8 100644 --- a/vpr/src/route/route_utilization.cpp +++ b/vpr/src/route/route_utilization.cpp @@ -5,12 +5,12 @@ #include "vpr_utils.h" #include "route_common.h" -RoutingChanUtilEstimator::RoutingChanUtilEstimator(const BlkLocRegistry& blk_loc_registry, bool cube_bb) { +RoutingChanUtilEstimator::RoutingChanUtilEstimator(const BlkLocRegistry& blk_loc_registry) { placer_state_ = std::make_unique(/*placement_is_timing_driven=*/false); placer_state_->mutable_blk_loc_registry() = blk_loc_registry; placer_opts_.place_algorithm = e_place_algorithm::BOUNDING_BOX_PLACE; - net_cost_handler_ = std::make_unique(placer_opts_, *placer_state_, cube_bb); + net_cost_handler_ = std::make_unique(placer_opts_, *placer_state_, /*cube_bb=*/true); } std::pair, vtr::NdMatrix> RoutingChanUtilEstimator::estimate_routing_chan_util() { diff --git a/vpr/src/route/route_utilization.h b/vpr/src/route/route_utilization.h index 8e71e73375d..c23897efe6d 100644 --- a/vpr/src/route/route_utilization.h +++ b/vpr/src/route/route_utilization.h @@ -8,13 +8,13 @@ /** * @class RoutingChanUtilEstimator - * @brief This class computes the net bounding boxes and estimates the routing channel utilization + * @brief This class computes the net bounding boxes (cube mode) and estimates the routing channel utilization * for each CHANX/CHANY channel by smearing the estimated wirelength for each net across all channels * within its bounding box. */ class RoutingChanUtilEstimator { public: - RoutingChanUtilEstimator(const BlkLocRegistry& blk_loc_registry, bool cube_bb); + RoutingChanUtilEstimator(const BlkLocRegistry& blk_loc_registry); std::pair, vtr::NdMatrix> estimate_routing_chan_util(); From 5b94af747aeb08870d9579f473f0410d12c9260d Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Tue, 8 Jul 2025 12:03:08 -0400 Subject: [PATCH 35/66] inline doxygen comments for data members of t_placer_opts and t_ap_opts --- vpr/src/analytical_place/ap_flow_enums.h | 2 +- vpr/src/base/vpr_types.h | 160 +++++++++++------------ 2 files changed, 74 insertions(+), 88 deletions(-) diff --git a/vpr/src/analytical_place/ap_flow_enums.h b/vpr/src/analytical_place/ap_flow_enums.h index da47927d5d1..707a842ae8d 100644 --- a/vpr/src/analytical_place/ap_flow_enums.h +++ b/vpr/src/analytical_place/ap_flow_enums.h @@ -27,7 +27,7 @@ enum class e_ap_analytical_solver { */ enum class e_ap_partial_legalizer { BiPartitioning, ///< Partial Legalizer which forms minimum windows around dense regions and uses bipartitioning to spread blocks over windows. - FlowBased ///> Partial Legalizer which flows blocks from overfilled bins to underfilled bins. + FlowBased ///< Partial Legalizer which flows blocks from overfilled bins to underfilled bins. }; /** diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index badd2174c88..ac9eef9d2f9 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -954,88 +954,77 @@ enum class e_move_type; /** * @brief Various options for the placer. - * - * @param place_algorithm - * Controls which placement algorithm is used. - * @param place_quench_algorithm - * Controls which placement algorithm is used - * during placement quench. - * @param timing_tradeoff - * When in CRITICALITY_TIMING_PLACE mode, what is the - * tradeoff between timing and wiring costs. - * @param place_chan_width - * The channel width assumed if only one placement is performed. - * @param pad_loc_type - * Are pins FREE or fixed randomly. - * @param constraints_file - * File that specifies locations of locked down (constrained) - * blocks for placement. Empty string means no constraints file. - * @param write_initial_place_file - * Write the initial placement into this file. Empty string means - * the initial placement is not written. - * @param pad_loc_file - * File to read pad locations from if pad_loc_type is USER. - * @param place_freq - * Should the placement be skipped, done once, or done - * for each channel width in the binary search. (Default: ONCE) - * @param recompute_crit_iter - * How many temperature stages pass before we recompute - * criticalities based on the current placement and its - * estimated point-to-point delays. - * @param inner_loop_crit_divider - * (move_lim/inner_loop_crit_divider) determines how - * many inner_loop iterations pass before a recompute - * of criticalities is done. - * @param td_place_exp_first - * Exponent that is used in the CRITICALITY_TIMING_PLACE - * mode to specify the initial value of `crit_exponent`. - * After we map the slacks to criticalities, this value - * is used to `sharpen` the criticalities, making connections - * with worse slacks more critical. - * @param td_place_exp_last - * Value that the crit_exponent will be at the end. - * @param doPlacement - * True if placement is supposed to be done in the CAD flow. - * False if otherwise. - * @param place_constraint_expand - * Integer value that specifies how far to expand the floorplan - * region when printing out floorplan constraints based on - * current placement. - * @param place_constraint_subtile - * True if subtiles should be specified when printing floorplan - * constraints. False if not. - * @param place_auto_init_t_scale - * When the annealer is using the automatic schedule, this option - * scales the initial temperature selected. */ struct t_placer_opts { + /// Controls which placement algorithm is used. t_place_algorithm place_algorithm; + + /// Controls which placement algorithm is used during placement quench. t_place_algorithm place_quench_algorithm; - t_annealing_sched anneal_sched; /// place_static_move_prob; + bool RL_agent_placement; bool place_agent_multistate; bool place_checkpointing; + int place_high_fanout_net; + e_place_bounding_box_mode place_bounding_box_mode; + e_agent_algorithm place_agent_algorithm; + float place_agent_epsilon; float place_agent_gamma; float place_dm_rlim; + e_agent_space place_agent_space; + std::string place_reward_fun; + float place_crit_limit; + + + /// Integer value that specifies how far to expand the floorplan region when + /// printing out floorplan constraints based on current placement. int place_constraint_expand; + + /// True if subtiles should be specified when printing floorplan constraints. False if not. bool place_constraint_subtile; + int floorplan_num_horizontal_partitions; int floorplan_num_vertical_partitions; + bool place_quench_only; int placer_debug_block; @@ -1082,6 +1088,7 @@ struct t_placer_opts { e_place_delta_delay_algorithm place_delta_delay_matrix_calculation_method; + /// When the annealer is using the automatic schedule, this option scales the initial temperature selected. float place_auto_init_t_scale; }; @@ -1091,63 +1098,42 @@ struct t_placer_opts { /** * @brief Various options for the Analytical Placer. - * - * @param doAnalyticalPlacement - * True if analytical placement is supposed to be done in the CAD - * flow. False if otherwise. - * @param analytical_solver_type - * The type of analytical solver the Global Placer in the AP flow - * will use. - * @param partial_legalizer_type - * The type of partial legalizer the Global Placer in the AP flow - * will use. - * @param full_legalizer_type - * The type of full legalizer the AP flow will use. - * @param detailed_placer_type - * The type of detailed placter the AP flow will use. - * @param ap_timing_tradeoff - * A trade-off parameter used to decide how focused the AP flow - * should be on optimizing timing over wirelength. - * @param ap_high_fanout_threshold; - * The threshold to ignore nets with higher fanout than that - * value while constructing the solver. - * @param ap_partial_legalizer_target_density - * Vector of strings passed by the user to configure the target - * density of different physical tiles on the device. - * @param appack_max_dist_th - * Array of string passed by the user to configure the max candidate - * distance thresholds. - * @param num_threads - * The number of threads the AP flow can use. - * @param log_verbosity - * The verbosity level of log messages in the AP flow, with higher - * values leading to more verbose messages. - * @param generate_mass_report - * Whether to generate a mass report during global placement or not. */ struct t_ap_opts { + /// True if analytical placement is supposed to be done in the CAD flow. False if otherwise. e_stage_action doAP; + /// The type of analytical solver the Global Placer in the AP flow will use. e_ap_analytical_solver analytical_solver_type; + /// The type of partial legalizer the Global Placer in the AP flow will use. e_ap_partial_legalizer partial_legalizer_type; + /// The type of full legalizer the AP flow will use. e_ap_full_legalizer full_legalizer_type; + /// The type of detailed placer the AP flow will use. e_ap_detailed_placer detailed_placer_type; + /// A trade-off parameter used to decide how focused the AP flow should be on optimizing timing over wirelength. float ap_timing_tradeoff; + /// The threshold to ignore nets with higher fanout than that value while constructing the solver. int ap_high_fanout_threshold; + /// Vector of strings passed by the user to configure the target density of different physical tiles on the device. std::vector ap_partial_legalizer_target_density; + /// Array of string passed by the user to configure the max candidate distance thresholds. std::vector appack_max_dist_th; + /// The number of threads the AP flow can use. unsigned num_threads; + /// The verbosity level of log messages in the AP flow, with higher values leading to more verbose messages. int log_verbosity; + /// Whether to generate a mass report during global placement or not. bool generate_mass_report; }; From 2703a644ac7df3b520a1be6ead03d771e8ecc0b4 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Tue, 8 Jul 2025 12:10:18 -0400 Subject: [PATCH 36/66] fix valgrind issue (using uninitialized variable) --- vpr/src/place/net_cost_handler.cpp | 12 +++++++----- vpr/src/place/net_cost_handler.h | 2 +- vpr/src/route/route_utilization.cpp | 2 +- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 44355b3b8f7..c2356c2de8a 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -1725,7 +1725,7 @@ double NetCostHandler::get_total_wirelength_estimate() const { return estimated_wirelength; } -double NetCostHandler::estimate_routing_chan_util() { +double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost/* = true*/) { const auto& cluster_ctx = g_vpr_ctx.clustering(); const DeviceContext& device_ctx = g_vpr_ctx.device(); @@ -1850,10 +1850,12 @@ double NetCostHandler::estimate_routing_chan_util() { double cong_cost = 0.; // Compute congestion cost using recomputed bounding boxes and channel utilization map - for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { - if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { - net_cong_cost_[net_id] = get_net_cube_cong_cost_(net_id, /*use_ts=*/false); - cong_cost += net_cong_cost_[net_id]; + if (compute_congestion_cost) { + for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { + if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { + net_cong_cost_[net_id] = get_net_cube_cong_cost_(net_id, /*use_ts=*/false); + cong_cost += net_cong_cost_[net_id]; + } } } diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index 70e6cad3eb9..d107e8be4df 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -143,7 +143,7 @@ class NetCostHandler { * * @return Total congestion cost. */ - double estimate_routing_chan_util(); + double estimate_routing_chan_util(bool compute_congestion_cost = true); std::pair&, const vtr::NdMatrix&> get_chanxy_util() const; diff --git a/vpr/src/route/route_utilization.cpp b/vpr/src/route/route_utilization.cpp index 5127f6e8db8..990559269d1 100644 --- a/vpr/src/route/route_utilization.cpp +++ b/vpr/src/route/route_utilization.cpp @@ -24,7 +24,7 @@ std::pair, vtr::NdMatrix> RoutingChanUtilEst net_cost_handler_->comp_bb_cong_cost(e_cost_methods::NORMAL); // Estimate routing channel utilization using - net_cost_handler_->estimate_routing_chan_util(); + net_cost_handler_->estimate_routing_chan_util(/*compute_congestion_cost=*/false); return net_cost_handler_->get_chanxy_util(); } else { From 48f725cad17d261e0a620e1ae48039f34fafd218 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Tue, 8 Jul 2025 15:14:31 -0400 Subject: [PATCH 37/66] make format --- vpr/src/base/vpr_types.h | 1 - vpr/src/place/net_cost_handler.cpp | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index eb10ea3ad2c..a92515703b2 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -1062,7 +1062,6 @@ struct t_placer_opts { float place_crit_limit; - /// Integer value that specifies how far to expand the floorplan region when /// printing out floorplan constraints based on current placement. int place_constraint_expand; diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 14445be60ef..8e78340da9c 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -1723,7 +1723,7 @@ double NetCostHandler::get_total_wirelength_estimate() const { return estimated_wirelength; } -double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost/* = true*/) { +double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost /* = true*/) { const auto& cluster_ctx = g_vpr_ctx.clustering(); const DeviceContext& device_ctx = g_vpr_ctx.device(); From 21c3318822137de806f62ff1c451d95e01ebffe7 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Tue, 8 Jul 2025 15:36:01 -0400 Subject: [PATCH 38/66] doxygen comments for some members of t_router_opts --- vpr/src/base/vpr_types.h | 86 ++++++++++++++++------------------------ 1 file changed, 35 insertions(+), 51 deletions(-) diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index a92515703b2..82f6ec0b390 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -1140,56 +1140,6 @@ struct t_ap_opts { * Router data types *******************************************************************/ -/* All the parameters controlling the router's operation are in this * - * structure. * - * first_iter_pres_fac: Present sharing penalty factor used for the * - * very first (congestion mapping) Pathfinder iteration. * - * initial_pres_fac: Initial present sharing penalty factor for * - * Pathfinder; used to set pres_fac on 2nd iteration. * - * pres_fac_mult: Amount by which pres_fac is multiplied each * - * routing iteration. * - * acc_fac: Historical congestion cost multiplier. Used unchanged * - * for all iterations. * - * bend_cost: Cost of a bend (usually non-zero only for global routing). * - * max_router_iterations: Maximum number of iterations before giving * - * up. * - * min_incremental_reroute_fanout: Minimum fanout a net needs to have * - * for incremental reroute to be applied to it through route * - * tree pruning. Larger circuits should get larger thresholds * - * bb_factor: Linear distance a route can go outside the net bounding * - * box. * - * route_type: GLOBAL or DETAILED. * - * fixed_channel_width: Only attempt to route the design once, with the * - * channel width given. If this variable is * - * == NO_FIXED_CHANNEL_WIDTH, do a binary search * - * on channel width. * - * router_algorithm: TIMING_DRIVEN or PARALLEL. Selects the desired * - * routing algorithm. * - * base_cost_type: Specifies how to compute the base cost of each type of * - * rr_node. DELAY_NORMALIZED -> base_cost = "demand" * - * x average delay to route past 1 CLB. DEMAND_ONLY -> * - * expected demand of this node (old breadth-first costs). * - * * - * The following parameters are used only by the timing-driven router. * - * * - * astar_fac: Factor (alpha) used to weight expected future costs to * - * target in the timing_driven router. astar_fac = 0 leads to * - * an essentially breadth-first search, astar_fac = 1 is near * - * the usual astar algorithm and astar_fac > 1 are more * - * aggressive. * - * astar_offset: Offset that is subtracted from the lookahead (expected * - * future costs) in the timing-driven router. * - * max_criticality: The maximum criticality factor (from 0 to 1) any sink * - * will ever have (i.e. clip criticality to this number). * - * criticality_exp: Set criticality to (path_length(sink) / longest_path) ^ * - * criticality_exp (then clip to max_criticality). * - * doRouting: true if routing is supposed to be done, false otherwise * - * routing_failure_predictor: sets the configuration to be used by the * - * routing failure predictor, how aggressive the threshold used to judge * - * and abort routings deemed unroutable * - * write_rr_graph_name: stores the file name of the output rr graph * - * read_rr_graph_name: stores the file name of the rr graph to be read by vpr */ - enum e_router_algorithm { NESTED, PARALLEL, @@ -1249,25 +1199,54 @@ enum class e_incr_reroute_delay_ripup { constexpr int NO_FIXED_CHANNEL_WIDTH = -1; +/** + * @brief Parameters controlling the router's operation. + */ struct t_router_opts { bool read_rr_edge_metadata = false; bool do_check_rr_graph = true; + + /// Present sharing penalty factor used for the very first (congestion mapping) Pathfinder iteration. float first_iter_pres_fac; + /// Initial present sharing penalty factor for Pathfinder; used to set pres_fac on 2nd iteration. float initial_pres_fac; + /// Amount by which pres_fac is multiplied each routing iteration. float pres_fac_mult; float max_pres_fac; + + /// Historical congestion cost multiplier. Used unchanged for all iterations. float acc_fac; + /// Cost of a bend (usually non-zero only for global routing). float bend_cost; + /// Maximum number of iterations before giving up. int max_router_iterations; + /// Minimum fanout a net needs to have for incremental reroute to be applied to it through route tree pruning. + /// Larger circuits should get larger thresholds int min_incremental_reroute_fanout; e_incr_reroute_delay_ripup incr_reroute_delay_ripup; + /// Linear distance a route can go outside the net bounding box. int bb_factor; + /// GLOBAL or DETAILED. enum e_route_type route_type; + /// Only attempt to route the design once, with the channel width given. + /// If this variable is == NO_FIXED_CHANNEL_WIDTH, do a binary search on channel width. int fixed_channel_width; - int min_channel_width_hint; /// base_cost = "demand" x average delay to route past 1 CLB. + /// DEMAND_ONLY -> expected demand of this node (old breadth-first costs). enum e_base_cost_type base_cost_type; + + /// Factor (alpha) used to weight expected future costs to target in the timing_driven router. + /// astar_fac = 0 leads to an essentially breadth-first search, + /// astar_fac = 1 is near the usual astar algorithm and astar_fac > 1 are more aggressive. float astar_fac; + + /// Offset that is subtracted from the lookahead (expected future costs) in the timing-driven router. float astar_offset; float router_profiler_astar_fac; bool enable_parallel_connection_router; @@ -1276,7 +1255,9 @@ struct t_router_opts { int multi_queue_num_threads; int multi_queue_num_queues; bool multi_queue_direct_draining; + /// The maximum criticality factor (from 0 to 1) any sink will ever have (i.e. clip criticality to this number). float max_criticality; + /// Set criticality to (path_length(sink) / longest_path) ^ criticality_exp (then clip to max_criticality). float criticality_exp; float init_wirelength_abort_threshold; bool verify_binary_search; @@ -1284,7 +1265,10 @@ struct t_router_opts { bool congestion_analysis; bool fanout_analysis; bool switch_usage_analysis; + /// true if routing is supposed to be done, false otherwise e_stage_action doRouting; + /// the configuration to be used by the routing failure predictor, + /// how aggressive the threshold used to judge and abort routings deemed unroutable enum e_routing_failure_predictor routing_failure_predictor; enum e_routing_budgets_algorithm routing_budgets_algorithm; bool save_routing_per_iteration; From ce611db50e3ac44a446de5f3db84a916ba675ad8 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Tue, 8 Jul 2025 18:16:11 -0400 Subject: [PATCH 39/66] comment congestion parameters in t_placer_opts --- vpr/src/base/vpr_types.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 82f6ec0b390..88eb287f576 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -968,8 +968,13 @@ struct t_placer_opts { /// When in CRITICALITY_TIMING_PLACE mode, what is the tradeoff between timing and wiring costs. float timing_tradeoff; + /// Weight for how much congestion affects placement cost. + /// Higher means congestion is more important. float congestion_factor; + /// Start using congestion cost when (current rlim / initial rlim) drops below this value. float congestion_rlim_trigger_ratio; + /// Nets with average channel usage (withing their bounding box) above this threshold + /// are predicted to face some congestion in the routing stage. float congestion_chan_util_threshold; /// The channel width assumed if only one placement is performed. From b47514bea87ecd835220990da138d87eaddcac3c Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Tue, 8 Jul 2025 18:21:32 -0400 Subject: [PATCH 40/66] remove dead code and add doxygen comment for congestion_modeling_started --- vpr/src/place/net_cost_handler.cpp | 8 +------- vpr/src/place/net_cost_handler.h | 17 +++++++++-------- 2 files changed, 10 insertions(+), 15 deletions(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 8e78340da9c..b73e7840a57 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -1419,17 +1419,11 @@ double NetCostHandler::get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts) VTR_ASSERT_SAFE(congestion_modeling_started_); const auto [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id]; - // const t_bb& bb = use_ts ? ts_bb_coord_new_[net_id] : bb_coords_[net_id]; - - // int distance_x = bb.xmax - bb.xmin + 1; - // int distance_y = bb.ymax - bb.ymin + 1; - const float threshold = placer_opts_.congestion_chan_util_threshold; float x_chan_cong = (x_chan_util < threshold) ? 0.0f : x_chan_util - threshold; float y_chan_cong = (y_chan_util < threshold) ? 0.0f : y_chan_util - threshold; - - // return (distance_x * x_chan_cong) + (distance_y * y_chan_cong); + return x_chan_cong + y_chan_cong; } diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index d107e8be4df..1970f49977c 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -148,22 +148,23 @@ class NetCostHandler { std::pair&, const vtr::NdMatrix&> get_chanxy_util() const; private: + /// Indicates whether congestion cost modeling is enabled. bool congestion_modeling_started_; - ///@brief Specifies whether the bounding box is computed using cube method or per-layer method. + /// Specifies whether the bounding box is computed using cube method or per-layer method. bool cube_bb_; - ///@brief Determines whether the FPGA has multiple dies (layers) + /// Determines whether the FPGA has multiple dies (layers) bool is_multi_layer_; - ///@brief A reference to the placer's state to be updated by this object. + /// A reference to the placer's state to be updated by this object. PlacerState& placer_state_; - ///@brief Contains some parameter that determine how the placement cost is computed. + /// Contains some parameter that determine how the placement cost is computed. const t_placer_opts& placer_opts_; - ///@brief Points to the proper method for computing the bounding box cost from scratch. + /// Points to the proper method for computing the bounding box cost from scratch. std::function(e_cost_methods method)> comp_bb_cong_cost_functor_; - ///@brief Points to the proper method for updating the bounding box of a net. + /// Points to the proper method for updating the bounding box of a net. std::function update_bb_functor_; - ///@brief Points to the proper method for getting the bounding box cost of a net + /// Points to the proper method for getting the bounding box cost of a net std::function get_net_bb_cost_functor_; - ///@brief Points to the proper method for getting the non-updatable bounding box of a net + /// Points to the proper method for getting the non-updatable bounding box of a net std::function get_non_updatable_bb_functor_; /** From 12286012f4beaa6bedd1040ee02749c4bd97ac33 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Tue, 8 Jul 2025 19:13:50 -0400 Subject: [PATCH 41/66] add ChannelData struct --- vpr/src/place/net_cost_handler.cpp | 121 +++++++++++++++-------------- vpr/src/place/net_cost_handler.h | 23 +++--- 2 files changed, 74 insertions(+), 70 deletions(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index b73e7840a57..b88fbc87dad 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -128,7 +128,7 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, get_non_updatable_bb_functor_ = std::bind(&NetCostHandler::get_non_updatable_per_layer_bb_, this, std::placeholders::_1, /*use_ts=*/true); } - /* This initializes the whole matrix to OPEN which is an invalid value*/ + // This initializes the whole matrix to OPEN which is an invalid value ts_layer_sink_pin_count_.resize({num_nets, size_t(num_layers)}, OPEN); num_sink_pin_layer_.resize({num_nets, size_t(num_layers)}, OPEN); @@ -140,27 +140,27 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, net_cong_cost_.resize(num_nets, -1.); proposed_net_cong_cost_.resize(num_nets, -1.); - /* Used to store costs for moves not yet made and to indicate when a net's - * cost has been recomputed. proposed_net_cost[inet] < 0 means net's cost hasn't - * been recomputed. */ + // Used to store costs for moves not yet made and to indicate when a net's + // cost has been recomputed. proposed_net_cost[inet] < 0 means net's cost hasn't + // been recomputed. bb_update_status_.resize(num_nets, NetUpdateState::NOT_UPDATED_YET); alloc_and_load_chan_w_factors_for_place_cost_(); - chanx_util_ = vtr::NdMatrix({{num_layers, grid_width, grid_height}}, 0); - chany_util_ = vtr::NdMatrix({{num_layers, grid_width, grid_height}}, 0); + chan_util_.x = vtr::NdMatrix({{num_layers, grid_width, grid_height}}, 0); + chan_util_.y = vtr::NdMatrix({{num_layers, grid_width, grid_height}}, 0); - acc_chanx_util_ = vtr::PrefixSum2D(grid_width, - grid_height, - [&](size_t x, size_t y) { - return chanx_util_[0][x][y]; - }); + acc_chan_util_.x = vtr::PrefixSum2D(grid_width, + grid_height, + [&](size_t x, size_t y) { + return chan_util_.x[0][x][y]; + }); - acc_chany_util_ = vtr::PrefixSum2D(grid_width, - grid_height, - [&](size_t x, size_t y) { - return chany_util_[0][x][y]; - }); + acc_chan_util_.y = vtr::PrefixSum2D(grid_width, + grid_height, + [&](size_t x, size_t y) { + return chan_util_.y[0][x][y]; + }); } void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_() { @@ -178,25 +178,26 @@ void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_() { * This returns the total number of tracks between channels 'low' and 'high', * including tracks in these channels. */ - acc_chanx_width_ = vtr::PrefixSum1D(grid_height, [&](size_t y) noexcept { + acc_chan_width_.x = vtr::PrefixSum1D(grid_height, [&](size_t y) noexcept { int chan_x_width = device_ctx.chan_width.x_list[y]; - /* If the number of tracks in a channel is zero, two consecutive elements take the same - * value. This can lead to a division by zero in get_chanxy_cost_fac_(). To avoid this - * potential issue, we assume that the channel width is at least 1. - */ - if (chan_x_width == 0) + // If the number of tracks in a channel is zero, two consecutive elements take the same + // value. This can lead to a division by zero in get_chanxy_cost_fac_(). To avoid this + // potential issue, we assume that the channel width is at least 1. + if (chan_x_width == 0) { return 1; + } return chan_x_width; }); - acc_chany_width_ = vtr::PrefixSum1D(grid_width, [&](size_t x) noexcept { + acc_chan_width_.y = vtr::PrefixSum1D(grid_width, [&](size_t x) noexcept { int chan_y_width = device_ctx.chan_width.y_list[x]; // to avoid a division by zero - if (chan_y_width == 0) + if (chan_y_width == 0) { return 1; + } return chan_y_width; }); @@ -276,8 +277,8 @@ std::tuple NetCostHandler::comp_cube_bb_cong_cost_(e_cos for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { - /* Small nets don't use incremental updating on their bounding boxes, - * so they can use a fast bounding box calculator. */ + // Small nets don't use incremental updating on their bounding boxes, + // so they can use a fast bounding box calculator. if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET && method == e_cost_methods::NORMAL) { get_bb_from_scratch_(net_id, /*use_ts=*/false); } else { @@ -564,8 +565,8 @@ void NetCostHandler::get_non_updatable_cube_bb_(ClusterNetId net_id, bool use_ts if (congestion_modeling_started_) { auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id]; const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1); - x_chan_util = acc_chanx_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; - y_chan_util = acc_chany_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; + x_chan_util = acc_chan_util_.x.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; + y_chan_util = acc_chan_util_.y.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; } } @@ -877,8 +878,8 @@ void NetCostHandler::update_bb_(ClusterNetId net_id, if (congestion_modeling_started_) { auto& [x_chan_util, y_chan_util] = ts_avg_chann_util_new_[net_id]; const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1); - x_chan_util = acc_chanx_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; - y_chan_util = acc_chany_util_.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; + x_chan_util = acc_chan_util_.x.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; + y_chan_util = acc_chan_util_.y.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; } } @@ -1318,8 +1319,8 @@ void NetCostHandler::get_bb_from_scratch_(ClusterNetId net_id, bool use_ts) { if (congestion_modeling_started_) { auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id]; const int total_channels = (coords.xmax - coords.xmin + 1) * (coords.ymax - coords.ymin + 1); - x_chan_util = acc_chanx_util_.get_sum(coords.xmin, coords.ymin, coords.xmax, coords.ymax) / total_channels; - y_chan_util = acc_chany_util_.get_sum(coords.xmin, coords.ymin, coords.xmax, coords.ymax) / total_channels; + x_chan_util = acc_chan_util_.x.get_sum(coords.xmin, coords.ymin, coords.xmax, coords.ymax) / total_channels; + y_chan_util = acc_chan_util_.y.get_sum(coords.xmin, coords.ymin, coords.xmax, coords.ymax) / total_channels; } } @@ -1725,8 +1726,8 @@ double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost / const size_t grid_height = device_ctx.grid.height(); const size_t num_layers = device_ctx.grid.get_num_layers(); - chanx_util_.fill(0.); - chany_util_.fill(0.); + chan_util_.x.fill(0.); + chan_util_.y.fill(0.); // For each net, this function estimates routing channel utilization by distributing // the net's expected wirelength across its bounding box. The expected wirelength @@ -1756,8 +1757,8 @@ double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost / for (int layer = bb.layer_min; layer <= bb.layer_max; layer++) { for (int x = bb.xmin; x <= bb.xmax; x++) { for (int y = bb.ymin; y <= bb.ymax; y++) { - chanx_util_[layer][x][y] += expected_per_x_segment_wl; - chany_util_[layer][x][y] += expected_per_y_segment_wl; + chan_util_.x[layer][x][y] += expected_per_x_segment_wl; + chan_util_.y[layer][x][y] += expected_per_y_segment_wl; } } } @@ -1785,8 +1786,8 @@ double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost / for (int x = bb[layer].xmin; x <= bb[layer].xmax; x++) { for (int y = bb[layer].ymin; y <= bb[layer].ymax; y++) { - chanx_util_[layer][x][y] += expected_per_x_segment_wl; - chany_util_[layer][x][y] += expected_per_y_segment_wl; + chan_util_.x[layer][x][y] += expected_per_x_segment_wl; + chan_util_.y[layer][x][y] += expected_per_y_segment_wl; } } } @@ -1795,30 +1796,30 @@ double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost / } // Channel width is computed only once and reused in later calls. - if (chanx_width_.empty()) { - VTR_ASSERT(chany_width_.empty()); - std::tie(chanx_width_, chany_width_) = calculate_channel_width(); + if (chan_width_.x.empty()) { + VTR_ASSERT(chan_width_.y.empty()); + std::tie(chan_width_.x, chan_width_.y) = calculate_channel_width(); } - VTR_ASSERT(chanx_util_.size() == chany_util_.size()); - VTR_ASSERT(chanx_util_.size() == chanx_width_.size()); - VTR_ASSERT(chany_util_.size() == chany_width_.size()); + VTR_ASSERT(chan_util_.x.size() == chan_util_.y.size()); + VTR_ASSERT(chan_util_.x.size() == chan_width_.x.size()); + VTR_ASSERT(chan_util_.y.size() == chan_width_.y.size()); for (size_t layer = 0; layer < num_layers; ++layer) { for (size_t x = 0; x < grid_width; ++x) { for (size_t y = 0; y < grid_height; ++y) { - if (chanx_width_[layer][x][y] > 0) { - chanx_util_[layer][x][y] /= chanx_width_[layer][x][y]; + if (chan_width_.x[layer][x][y] > 0) { + chan_util_.x[layer][x][y] /= chan_width_.x[layer][x][y]; } else { - VTR_ASSERT_SAFE(chanx_width_[layer][x][y] == 0); - chanx_util_[layer][x][y] = 1.; + VTR_ASSERT_SAFE(chan_width_.x[layer][x][y] == 0); + chan_util_.x[layer][x][y] = 1.; } - if (chany_width_[layer][x][y] > 0) { - chany_util_[layer][x][y] /= chany_width_[layer][x][y]; + if (chan_width_.y[layer][x][y] > 0) { + chan_util_.y[layer][x][y] /= chan_width_.y[layer][x][y]; } else { - VTR_ASSERT_SAFE(chany_width_[layer][x][y] == 0); - chany_util_[layer][x][y] = 1.; + VTR_ASSERT_SAFE(chan_width_.y[layer][x][y] == 0); + chan_util_.y[layer][x][y] = 1.; } } } @@ -1826,16 +1827,16 @@ double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost / // For now, congestion modeling in the placement stage is limited to a single die // TODO: extend it to multiple dice - acc_chanx_util_ = vtr::PrefixSum2D(grid_width, - grid_height, - [&](size_t x, size_t y) { - return chanx_util_[0][x][y]; - }); + acc_chan_util_.x = vtr::PrefixSum2D(grid_width, + grid_height, + [&](size_t x, size_t y) { + return chan_util_.x[0][x][y]; + }); - acc_chany_util_ = vtr::PrefixSum2D(grid_width, + acc_chan_util_.y = vtr::PrefixSum2D(grid_width, grid_height, [&](size_t x, size_t y) { - return chany_util_[0][x][y]; + return chan_util_.y[0][x][y]; }); congestion_modeling_started_ = true; @@ -1855,7 +1856,7 @@ double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost / } std::pair&, const vtr::NdMatrix&> NetCostHandler::get_chanxy_util() const { - return {chanx_util_, chany_util_}; + return {chan_util_.x, chan_util_.y}; } void NetCostHandler::set_ts_bb_coord_(const ClusterNetId net_id) { diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index 1970f49977c..05fa6ba1ace 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -35,6 +35,13 @@ enum class e_cost_methods { CHECK }; +template +struct ChannelData { + T x; + T y; + // TODO: add Z dimension +}; + class NetCostHandler { public: NetCostHandler() = delete; @@ -259,17 +266,13 @@ class NetCostHandler { * number of tracks in that direction; for other cost functions they * will never be used. */ - vtr::PrefixSum1D acc_chanx_width_; // [0..device_ctx.grid.width()-1] - vtr::PrefixSum1D acc_chany_width_; // [0..device_ctx.grid.height()-1] + ChannelData> acc_chan_width_; - vtr::PrefixSum2D acc_chanx_util_; - vtr::PrefixSum2D acc_chany_util_; + ChannelData> acc_chan_util_; - vtr::NdMatrix chanx_util_; - vtr::NdMatrix chany_util_; + ChannelData> chan_util_; - vtr::NdMatrix chanx_width_; - vtr::NdMatrix chany_width_; + ChannelData> chan_width_; /** * @brief The matrix below is used to calculate a chanz_place_cost_fac based on the average channel width in @@ -585,10 +588,10 @@ class NetCostHandler { */ template std::pair get_chanxy_cost_fac_(const BBT& bb) { - const int total_chanx_width = acc_chanx_width_.get_sum(bb.ymin, bb.ymax); + const int total_chanx_width = acc_chan_width_.x.get_sum(bb.ymin, bb.ymax); const double inverse_average_chanx_width = (bb.ymax - bb.ymin + 1.0) / total_chanx_width; - const int total_chany_width = acc_chany_width_.get_sum(bb.xmin, bb.xmax); + const int total_chany_width = acc_chan_width_.y.get_sum(bb.xmin, bb.xmax); const double inverse_average_chany_width = (bb.xmax - bb.xmin + 1.0) / total_chany_width; return {inverse_average_chanx_width, inverse_average_chany_width}; From 04a84cde8d64e4dbeb21a60b46500bcb24e1e823 Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Tue, 8 Jul 2025 19:20:53 -0400 Subject: [PATCH 42/66] return ChannelData from get_chan_util() --- vpr/src/place/net_cost_handler.cpp | 4 ++-- vpr/src/place/net_cost_handler.h | 2 +- vpr/src/route/route_common.cpp | 17 +++++++---------- vpr/src/route/route_utilization.cpp | 24 +++++++++++++----------- vpr/src/route/route_utilization.h | 2 +- 5 files changed, 24 insertions(+), 25 deletions(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index b88fbc87dad..ab95a14b7d3 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -1855,8 +1855,8 @@ double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost / return cong_cost; } -std::pair&, const vtr::NdMatrix&> NetCostHandler::get_chanxy_util() const { - return {chan_util_.x, chan_util_.y}; +const ChannelData>& NetCostHandler::get_chan_util() const { + return chan_util_; } void NetCostHandler::set_ts_bb_coord_(const ClusterNetId net_id) { diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index 05fa6ba1ace..2bb599c19e2 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -152,7 +152,7 @@ class NetCostHandler { */ double estimate_routing_chan_util(bool compute_congestion_cost = true); - std::pair&, const vtr::NdMatrix&> get_chanxy_util() const; + const ChannelData>& get_chan_util() const; private: /// Indicates whether congestion cost modeling is enabled. diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp index db8be67b405..521ae9d40e9 100644 --- a/vpr/src/route/route_common.cpp +++ b/vpr/src/route/route_common.cpp @@ -80,15 +80,13 @@ static bool classes_in_same_block(ParentBlockId blk_id, int first_class_ptc_num, * @param route_opts Contains channel utilization threshold and weighting factor * used to increase initial 'acc_cost' for nodes going through * congested channels. - * @param chanx_util Post-placement estimate of CHANX routing utilization per (layer, x, y) location. - * @param chany_util Post-placement estimate of CHANY routing utilization per (layer, x, y) location. + * @param chan_util Post-placement estimate of routing channel utilization per (layer, x, y) location. * @return Initial `acc_cost` for the given RR node. */ static float comp_initial_acc_cost(RRNodeId node_id, const t_router_opts& route_opts, - const vtr::NdMatrix& chanx_util, - const vtr::NdMatrix& chany_util); + const ChannelData>& chan_util); /************************** Subroutine definitions ***************************/ @@ -436,8 +434,7 @@ void alloc_and_load_rr_node_route_structs(const t_router_opts& router_opts) { static float comp_initial_acc_cost(RRNodeId node_id, const t_router_opts& route_opts, - const vtr::NdMatrix& chanx_util, - const vtr::NdMatrix& chany_util) { + const ChannelData>& chan_util) { const auto& rr_graph = g_vpr_ctx.device().rr_graph; // The default acc_cost is 1 for all rr_nodes. For routing wires, if they pass through a channel @@ -457,7 +454,7 @@ static float comp_initial_acc_cost(RRNodeId node_id, int y = rr_graph.node_ylow(node_id); int layer = rr_graph.node_layer(node_id); for (int x = rr_graph.node_xlow(node_id); x <= rr_graph.node_xhigh(node_id); x++) { - max_util = std::max(max_util, chanx_util[layer][x][y]); + max_util = std::max(max_util, chan_util.x[layer][x][y]); } } else { @@ -465,7 +462,7 @@ static float comp_initial_acc_cost(RRNodeId node_id, int x = rr_graph.node_xlow(node_id); int layer = rr_graph.node_layer(node_id); for (int y = rr_graph.node_ylow(node_id); y <= rr_graph.node_yhigh(node_id); y++) { - max_util = std::max(max_util, chany_util[layer][x][y]); + max_util = std::max(max_util, chan_util.y[layer][x][y]); } } @@ -485,13 +482,13 @@ void reset_rr_node_route_structs(const t_router_opts& route_opts) { VTR_ASSERT(route_ctx.rr_node_route_inf.size() == size_t(device_ctx.rr_graph.num_nodes())); RoutingChanUtilEstimator routing_chan_util_estimator(blk_loc_registry); - const auto [chanx_util, chany_util] = routing_chan_util_estimator.estimate_routing_chan_util(); + const ChannelData> chan_util = routing_chan_util_estimator.estimate_routing_chan_util(); for (const RRNodeId rr_id : device_ctx.rr_graph.nodes()) { t_rr_node_route_inf& node_inf = route_ctx.rr_node_route_inf[rr_id]; node_inf.prev_edge = RREdgeId::INVALID(); - node_inf.acc_cost = comp_initial_acc_cost(rr_id, route_opts, chanx_util, chany_util); + node_inf.acc_cost = comp_initial_acc_cost(rr_id, route_opts, chan_util); node_inf.path_cost = std::numeric_limits::infinity(); node_inf.backward_path_cost = std::numeric_limits::infinity(); node_inf.set_occ(0); diff --git a/vpr/src/route/route_utilization.cpp b/vpr/src/route/route_utilization.cpp index 990559269d1..531382352d8 100644 --- a/vpr/src/route/route_utilization.cpp +++ b/vpr/src/route/route_utilization.cpp @@ -13,7 +13,7 @@ RoutingChanUtilEstimator::RoutingChanUtilEstimator(const BlkLocRegistry& blk_loc net_cost_handler_ = std::make_unique(placer_opts_, *placer_state_, /*cube_bb=*/true); } -std::pair, vtr::NdMatrix> RoutingChanUtilEstimator::estimate_routing_chan_util() { +ChannelData> RoutingChanUtilEstimator::estimate_routing_chan_util() { const auto& clb_nlist = g_vpr_ctx.clustering().clb_nlist; const auto& block_locs = placer_state_->block_locs(); @@ -26,21 +26,23 @@ std::pair, vtr::NdMatrix> RoutingChanUtilEst // Estimate routing channel utilization using net_cost_handler_->estimate_routing_chan_util(/*compute_congestion_cost=*/false); - return net_cost_handler_->get_chanxy_util(); + return net_cost_handler_->get_chan_util(); } else { const auto& device_ctx = g_vpr_ctx.device(); - auto chanx_util = vtr::NdMatrix({{(size_t)device_ctx.grid.get_num_layers(), - device_ctx.grid.width(), - device_ctx.grid.height()}}, - 0); + ChannelData> chan_util; - auto chany_util = vtr::NdMatrix({{(size_t)device_ctx.grid.get_num_layers(), - device_ctx.grid.width(), - device_ctx.grid.height()}}, - 0); + chan_util.x = vtr::NdMatrix({{(size_t)device_ctx.grid.get_num_layers(), + device_ctx.grid.width(), + device_ctx.grid.height()}}, + 0); - return {chanx_util, chany_util}; + chan_util.y = vtr::NdMatrix({{(size_t)device_ctx.grid.get_num_layers(), + device_ctx.grid.width(), + device_ctx.grid.height()}}, + 0); + + return chan_util; } } diff --git a/vpr/src/route/route_utilization.h b/vpr/src/route/route_utilization.h index c23897efe6d..5a89247eaf1 100644 --- a/vpr/src/route/route_utilization.h +++ b/vpr/src/route/route_utilization.h @@ -16,7 +16,7 @@ class RoutingChanUtilEstimator { public: RoutingChanUtilEstimator(const BlkLocRegistry& blk_loc_registry); - std::pair, vtr::NdMatrix> estimate_routing_chan_util(); + ChannelData> estimate_routing_chan_util(); private: std::unique_ptr placer_state_; From 73332ab7a05b0648f63ae5aa0acae8d33f91a4cb Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Tue, 8 Jul 2025 19:34:47 -0400 Subject: [PATCH 43/66] doxygen comments for congestion related matrices in NetCostHandler --- vpr/src/base/stats.h | 4 +++- vpr/src/place/net_cost_handler.cpp | 2 +- vpr/src/place/net_cost_handler.h | 24 +++++++++++++++++++++--- vpr/src/route/route_utilization.cpp | 2 +- 4 files changed, 26 insertions(+), 6 deletions(-) diff --git a/vpr/src/base/stats.h b/vpr/src/base/stats.h index 0f73aa13362..1d4f118721a 100644 --- a/vpr/src/base/stats.h +++ b/vpr/src/base/stats.h @@ -28,7 +28,7 @@ void routing_stats(const Netlist<>& net_list, /** * @brief Calculates the routing channel width at each grid location. * - * Iterates through all RR nodes and counts how many wires pass through each (x, y) location + * Iterates through all RR nodes and counts how many wires pass through each (layer, x, y) location * for both horizontal (CHANX) and vertical (CHANY) channels. * * @return A pair of 3D matrices: @@ -73,6 +73,8 @@ void print_device_utilization(const float target_device_utilization); * - Occupancy percentage (occupancy / capacity) * - Channel capacity * + * TODO: extend to 3D + * * @param filename Output file path. * @param occupancy Matrix of occupancy counts. * @param capacity_list List of channel capacities (per y for chanx, per x for chany). diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index ab95a14b7d3..7d9e0d5d0f8 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -1842,7 +1842,7 @@ double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost / congestion_modeling_started_ = true; double cong_cost = 0.; - // Compute congestion cost using recomputed bounding boxes and channel utilization map + // Compute congestion cost using computed bounding boxes and channel utilization map if (compute_congestion_cost) { for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index 2bb599c19e2..62218b3c448 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -143,15 +143,22 @@ class NetCostHandler { /** * @brief Estimates routing channel utilization and computes the congestion cost * for each net. + * @param compute_congestion_cost Indicates whether computing congestion cost is needed. * * For each net, distributes estimated wirelength across its bounding box * and accumulates demand for different routing channels. Normalizes by channel widths * (e.g. a value of 0.5 means 50% of the wiring in a channel is expected to be used). * - * @return Total congestion cost. + * @note This method assumes that net bounding boxes are already computed. + * + * @return Total congestion cost if requested. */ double estimate_routing_chan_util(bool compute_congestion_cost = true); + /** + * @brief Returns the estimated routing channel usage for each location in the grid. + * The channel usage estimates are computed in estimate_routing_chan_util(). + */ const ChannelData>& get_chan_util() const; private: @@ -268,10 +275,21 @@ class NetCostHandler { */ ChannelData> acc_chan_width_; - ChannelData> acc_chan_util_; - + /** + * @brief Estimated routing usage per channel segment, + * indexed by [layer][x][y]. Values represent normalized wire demand + * contribution from all nets distributed over their bounding boxes. + */ ChannelData> chan_util_; + /** + * @brief Accumulated (prefix sum) channel utilization in each direction (x/y), + * on the base layer. Enables fast computation of average utilization + * over a net’s bounding box during congestion cost estimation. + */ + ChannelData> acc_chan_util_; + + /// Available channel width per grid location, indexed by [layer][x][y]. ChannelData> chan_width_; /** diff --git a/vpr/src/route/route_utilization.cpp b/vpr/src/route/route_utilization.cpp index 531382352d8..2cc40349183 100644 --- a/vpr/src/route/route_utilization.cpp +++ b/vpr/src/route/route_utilization.cpp @@ -23,7 +23,7 @@ ChannelData> RoutingChanUtilEstimator::estimate_routing // Compute net bounding boxes net_cost_handler_->comp_bb_cong_cost(e_cost_methods::NORMAL); - // Estimate routing channel utilization using + // Estimate routing channel usage net_cost_handler_->estimate_routing_chan_util(/*compute_congestion_cost=*/false); return net_cost_handler_->get_chan_util(); From 98f30449dc5c830ed85e22648a0239a8115e5fda Mon Sep 17 00:00:00 2001 From: soheilshahrouz Date: Tue, 8 Jul 2025 19:55:21 -0400 Subject: [PATCH 44/66] rename *chann* to *chan* in NetCostHandler --- vpr/src/place/net_cost_handler.cpp | 14 +++++++------- vpr/src/place/net_cost_handler.h | 4 ++-- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 7d9e0d5d0f8..273c287accb 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -105,10 +105,10 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, if (cube_bb_) { ts_bb_edge_new_.resize(num_nets, t_bb()); ts_bb_coord_new_.resize(num_nets, t_bb()); - ts_avg_chann_util_new_.resize(num_nets, {0., 0.}); + ts_avg_chan_util_new_.resize(num_nets, {0., 0.}); bb_coords_.resize(num_nets, t_bb()); - avg_chann_util_.resize(num_nets, {0., 0.}); + avg_chan_util_.resize(num_nets, {0., 0.}); bb_num_on_edges_.resize(num_nets, t_bb()); comp_bb_cong_cost_functor_ = std::bind(&NetCostHandler::comp_cube_bb_cong_cost_, this, std::placeholders::_1); @@ -563,7 +563,7 @@ void NetCostHandler::get_non_updatable_cube_bb_(ClusterNetId net_id, bool use_ts } if (congestion_modeling_started_) { - auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id]; + auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chan_util_new_[net_id] : avg_chan_util_[net_id]; const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1); x_chan_util = acc_chan_util_.x.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; y_chan_util = acc_chan_util_.y.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; @@ -876,7 +876,7 @@ void NetCostHandler::update_bb_(ClusterNetId net_id, } if (congestion_modeling_started_) { - auto& [x_chan_util, y_chan_util] = ts_avg_chann_util_new_[net_id]; + auto& [x_chan_util, y_chan_util] = ts_avg_chan_util_new_[net_id]; const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1); x_chan_util = acc_chan_util_.x.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; y_chan_util = acc_chan_util_.y.get_sum(bb_coord_new.xmin, bb_coord_new.ymin, bb_coord_new.xmax, bb_coord_new.ymax) / total_channels; @@ -1317,7 +1317,7 @@ void NetCostHandler::get_bb_from_scratch_(ClusterNetId net_id, bool use_ts) { num_on_edges.layer_max = layer_max_edge; if (congestion_modeling_started_) { - auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id]; + auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chan_util_new_[net_id] : avg_chan_util_[net_id]; const int total_channels = (coords.xmax - coords.xmin + 1) * (coords.ymax - coords.ymin + 1); x_chan_util = acc_chan_util_.x.get_sum(coords.xmin, coords.ymin, coords.xmax, coords.ymax) / total_channels; y_chan_util = acc_chan_util_.y.get_sum(coords.xmin, coords.ymin, coords.xmax, coords.ymax) / total_channels; @@ -1418,7 +1418,7 @@ double NetCostHandler::get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts) { double NetCostHandler::get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts) { VTR_ASSERT_SAFE(congestion_modeling_started_); - const auto [x_chan_util, y_chan_util] = use_ts ? ts_avg_chann_util_new_[net_id] : avg_chann_util_[net_id]; + const auto [x_chan_util, y_chan_util] = use_ts ? ts_avg_chan_util_new_[net_id] : avg_chan_util_[net_id]; const float threshold = placer_opts_.congestion_chan_util_threshold; @@ -1862,7 +1862,7 @@ const ChannelData>& NetCostHandler::get_chan_util() con void NetCostHandler::set_ts_bb_coord_(const ClusterNetId net_id) { if (cube_bb_) { bb_coords_[net_id] = ts_bb_coord_new_[net_id]; - avg_chann_util_[net_id] = ts_avg_chann_util_new_[net_id]; + avg_chan_util_[net_id] = ts_avg_chan_util_new_[net_id]; } else { layer_bb_coords_[net_id] = layer_ts_bb_coord_new_[net_id]; } diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index 62218b3c448..3537182c227 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -214,7 +214,7 @@ class NetCostHandler { /* [0...num_affected_nets] -> net_id of the affected nets */ std::vector ts_nets_to_update_; - vtr::vector> ts_avg_chann_util_new_; + vtr::vector> ts_avg_chan_util_new_; /// Store the number of blocks on each of a net's bounding box (to allow efficient updates) /// [0..cluster_ctx.clb_nlist.nets().size()-1] @@ -224,7 +224,7 @@ class NetCostHandler { /// [0..cluster_ctx.clb_nlist.nets().size()-1] vtr::vector bb_coords_; - vtr::vector> avg_chann_util_; + vtr::vector> avg_chan_util_; /// Store the number of blocks on each of a net's bounding box (to allow efficient updates) /// [0..cluster_ctx.clb_nlist.nets().size()-1] From 8bedcd92cabaa9f4314a395beb494cc3f9674786 Mon Sep 17 00:00:00 2001 From: Soheil Shahrouz Date: Thu, 10 Jul 2025 10:43:19 -0400 Subject: [PATCH 45/66] update comments in NetCostHandler to document return types --- vpr/src/place/net_cost_handler.h | 39 +++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 13 deletions(-) diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index 3537182c227..454bc81be65 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -62,16 +62,16 @@ class NetCostHandler { NetCostHandler(const t_placer_opts& placer_opts, PlacerState& placer_state, bool cube_bb); /** - * @brief Finds the bb cost from scratch. - * Done only when the placement has been radically changed - * (i.e. after initial placement). Otherwise find the cost + * @brief Finds the bb cost and congestion cost from scratch. + * @details Done only when the placement has been radically changed + * (i.e. after initial placement). Otherwise, find the cost * change incrementally. If method check is NORMAL, we find * bounding boxes that are updatable for the larger nets. * If method is CHECK, all bounding boxes are found via the * non_updateable_bb routine, to provide a cost which can be * used to check the correctness of the other routine. * @param method The method used to calculate placement cost. - * @return (bounding box cost of the placement, estimated wirelength) + * @return (bounding box cost of the placement, estimated wirelength, congestion cost) * * @note The returned estimated wirelength is valid only when method == CHECK */ @@ -172,7 +172,7 @@ class NetCostHandler { PlacerState& placer_state_; /// Contains some parameter that determine how the placement cost is computed. const t_placer_opts& placer_opts_; - /// Points to the proper method for computing the bounding box cost from scratch. + /// Points to the proper method for computing the bounding box cost, estimated wirelength and congestion cost from scratch. std::function(e_cost_methods method)> comp_bb_cong_cost_functor_; /// Points to the proper method for updating the bounding box of a net. std::function update_bb_functor_; @@ -259,6 +259,15 @@ class NetCostHandler { vtr::vector net_cost_; vtr::vector proposed_net_cost_; + /** + * @brief The congestion cost for each net is based on the extent to which its + * average routing channel utilization exceeds a predefined threshold. + * This is computed by measuring the average utilization within the net's + * bounding box and subtracting the congestion threshold. + * Only the excess portion contributes to the net's congestion cost. + * The valid range is [0...cluster_ctx.clb_nlist.nets().size()-1] when + * congestion modeling is enabled. Otherwise, this vector would be empty. + */ vtr::vector net_cong_cost_; vtr::vector proposed_net_cong_cost_; @@ -335,7 +344,8 @@ class NetCostHandler { /** * @brief Calculates and returns the total bb (wirelength) cost change that would result from moving the blocks * indicated in the blocks_affected data structure. - * @param bb_delta_c Cost difference after and before moving the block + * @param bb_delta_c Bounding box cost difference after and before moving the block. + * @param congestion_delta_c Congestion cost difference after and before moving the block. */ void set_bb_delta_cost_(double& bb_delta_c, double& congestion_delta_c); @@ -352,7 +362,7 @@ class NetCostHandler { /** * @brief Allocates and loads acc_tile_num_inter_die_conn_ which contains the accumulative number of inter-die - * conntections. + * connections. * * @details This is only useful for multi-die FPGAs. */ @@ -544,17 +554,18 @@ class NetCostHandler { * @brief Computes the bounding box from scratch using 2D bounding boxes (per-layer mode) * @param method The method used to calculate placement cost. Specifies whether the cost is * computed from scratch or incrementally. - * @return (bounding box cost of the placement, estimated wirelength) - * + * @return (bounding box cost of the placement, estimated wirelength, congestion cost) + * @note Congestion modeling is not supported for per-layer mode, so 0 is returned. * @note The returned estimated wirelength is valid only when method == CHECK */ std::tuple comp_per_layer_bb_cost_(e_cost_methods method); /** * @brief Computes the bounding box from scratch using 3D bounding boxes (cube mode) + * and calculates BB cost, estimated wirelength, and congestion cost (if enabled). * @param method The method used to calculate placement cost. Specifies whether the cost is - * computed from scratch or incrementally. - * @return (bounding box cost of the placement, estimated wirelength) + * computed from scratch or incrementally. + * @return (bounding box cost of the placement, estimated wirelength, congestion cost) * * @note The returned estimated wirelength is valid only when method == CHECK */ @@ -568,8 +579,10 @@ class NetCostHandler { /** * @brief To mitigate round-off errors, every once in a while, the costs of nets are summed up from scratch. - * This functions is called to do that for bb cost. It doesn't calculate the BBs from scratch, it would only add the costs again. - * @return Total bb (wirelength) cost for the placement + * This function is called to do that for bb and congestion cost. + * It doesn't calculate the BBs or channel usage estimate from scratch, + * it would only add the costs again. + * @return (total bb cost, total congestion cost) */ std::pair recompute_bb_cong_cost_(); From 2b4aa2df7992902c0578e7ee57bc658c2a057efa Mon Sep 17 00:00:00 2001 From: Soheil Shahrouz Date: Thu, 10 Jul 2025 11:00:35 -0400 Subject: [PATCH 46/66] make format --- vpr/src/place/net_cost_handler.cpp | 10 +++++----- vpr/src/place/net_cost_handler.h | 22 +++++++++++----------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 273c287accb..60c613783c1 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -1424,7 +1424,7 @@ double NetCostHandler::get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts) float x_chan_cong = (x_chan_util < threshold) ? 0.0f : x_chan_util - threshold; float y_chan_cong = (y_chan_util < threshold) ? 0.0f : y_chan_util - threshold; - + return x_chan_cong + y_chan_cong; } @@ -1834,10 +1834,10 @@ double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost / }); acc_chan_util_.y = vtr::PrefixSum2D(grid_width, - grid_height, - [&](size_t x, size_t y) { - return chan_util_.y[0][x][y]; - }); + grid_height, + [&](size_t x, size_t y) { + return chan_util_.y[0][x][y]; + }); congestion_modeling_started_ = true; diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index 454bc81be65..9d1714bfd3c 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -282,24 +282,24 @@ class NetCostHandler { * number of tracks in that direction; for other cost functions they * will never be used. */ - ChannelData> acc_chan_width_; + ChannelData> acc_chan_width_; - /** + /** * @brief Estimated routing usage per channel segment, * indexed by [layer][x][y]. Values represent normalized wire demand * contribution from all nets distributed over their bounding boxes. */ - ChannelData> chan_util_; + ChannelData> chan_util_; - /** - * @brief Accumulated (prefix sum) channel utilization in each direction (x/y), - * on the base layer. Enables fast computation of average utilization - * over a net’s bounding box during congestion cost estimation. - */ - ChannelData> acc_chan_util_; + /** + * @brief Accumulated (prefix sum) channel utilization in each direction (x/y), + * on the base layer. Enables fast computation of average utilization + * over a net’s bounding box during congestion cost estimation. + */ + ChannelData> acc_chan_util_; - /// Available channel width per grid location, indexed by [layer][x][y]. - ChannelData> chan_width_; + /// Available channel width per grid location, indexed by [layer][x][y]. + ChannelData> chan_width_; /** * @brief The matrix below is used to calculate a chanz_place_cost_fac based on the average channel width in From 4da31eeda423e807a3295c0b17d2cf4b1120f8c0 Mon Sep 17 00:00:00 2001 From: Soheil Shahrouz Date: Thu, 10 Jul 2025 11:18:30 -0400 Subject: [PATCH 47/66] lazy allocation of congestion-related data members --- vpr/src/place/net_cost_handler.cpp | 60 ++++++++++++++++++------------ vpr/src/place/net_cost_handler.h | 13 +++++-- 2 files changed, 47 insertions(+), 26 deletions(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 60c613783c1..abaf066eae3 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -94,8 +94,6 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, , placer_opts_(placer_opts) { const auto& device_ctx = g_vpr_ctx.device(); - const size_t grid_width = device_ctx.grid.width(); - const size_t grid_height = device_ctx.grid.height(); const size_t num_layers = device_ctx.grid.get_num_layers(); const size_t num_nets = g_vpr_ctx.clustering().clb_nlist.nets().size(); @@ -105,10 +103,8 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, if (cube_bb_) { ts_bb_edge_new_.resize(num_nets, t_bb()); ts_bb_coord_new_.resize(num_nets, t_bb()); - ts_avg_chan_util_new_.resize(num_nets, {0., 0.}); bb_coords_.resize(num_nets, t_bb()); - avg_chan_util_.resize(num_nets, {0., 0.}); bb_num_on_edges_.resize(num_nets, t_bb()); comp_bb_cong_cost_functor_ = std::bind(&NetCostHandler::comp_cube_bb_cong_cost_, this, std::placeholders::_1); @@ -137,8 +133,6 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, // negative net costs mean the cost is not valid. net_cost_.resize(num_nets, -1.); proposed_net_cost_.resize(num_nets, -1.); - net_cong_cost_.resize(num_nets, -1.); - proposed_net_cong_cost_.resize(num_nets, -1.); // Used to store costs for moves not yet made and to indicate when a net's // cost has been recomputed. proposed_net_cost[inet] < 0 means net's cost hasn't @@ -147,20 +141,14 @@ NetCostHandler::NetCostHandler(const t_placer_opts& placer_opts, alloc_and_load_chan_w_factors_for_place_cost_(); - chan_util_.x = vtr::NdMatrix({{num_layers, grid_width, grid_height}}, 0); - chan_util_.y = vtr::NdMatrix({{num_layers, grid_width, grid_height}}, 0); - - acc_chan_util_.x = vtr::PrefixSum2D(grid_width, - grid_height, - [&](size_t x, size_t y) { - return chan_util_.x[0][x][y]; - }); - - acc_chan_util_.y = vtr::PrefixSum2D(grid_width, - grid_height, - [&](size_t x, size_t y) { - return chan_util_.y[0][x][y]; - }); + // Congestion-related data members are not allocated until congestion modeling is enabled + // by calling estimate_routing_chan_util(). + VTR_ASSERT(!congestion_modeling_started_); + VTR_ASSERT(chan_util_.x.empty() && chan_util_.y.empty()); + VTR_ASSERT(acc_chan_util_.x.empty() && acc_chan_util_.y.empty()); + VTR_ASSERT(ts_avg_chan_util_new_.empty()); + VTR_ASSERT(avg_chan_util_.empty()); + VTR_ASSERT(net_cong_cost_.empty() && proposed_net_cong_cost_.empty()); } void NetCostHandler::alloc_and_load_chan_w_factors_for_place_cost_() { @@ -1718,13 +1706,41 @@ double NetCostHandler::get_total_wirelength_estimate() const { return estimated_wirelength; } -double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost /* = true*/) { +double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost /*=true*/) { const auto& cluster_ctx = g_vpr_ctx.clustering(); const DeviceContext& device_ctx = g_vpr_ctx.device(); const size_t grid_width = device_ctx.grid.width(); const size_t grid_height = device_ctx.grid.height(); const size_t num_layers = device_ctx.grid.get_num_layers(); + const size_t num_nets = g_vpr_ctx.clustering().clb_nlist.nets().size(); + + // Congestion-related data members are allocated the first time this method is called + // to enable congestion modeling. This lazy allocation helps save memory when congestion + // modeling is not used. + if (!congestion_modeling_started_) { + congestion_modeling_started_ = true; + + chan_util_.x = vtr::NdMatrix({{num_layers, grid_width, grid_height}}, 0); + chan_util_.y = vtr::NdMatrix({{num_layers, grid_width, grid_height}}, 0); + + acc_chan_util_.x = vtr::PrefixSum2D(grid_width, + grid_height, + [&](size_t x, size_t y) { + return chan_util_.x[0][x][y]; + }); + + acc_chan_util_.y = vtr::PrefixSum2D(grid_width, + grid_height, + [&](size_t x, size_t y) { + return chan_util_.y[0][x][y]; + }); + + ts_avg_chan_util_new_.resize(num_nets, {0., 0.}); + avg_chan_util_.resize(num_nets, {0., 0.}); + net_cong_cost_.resize(num_nets, -1.); + proposed_net_cong_cost_.resize(num_nets, -1.); + } chan_util_.x.fill(0.); chan_util_.y.fill(0.); @@ -1839,8 +1855,6 @@ double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost / return chan_util_.y[0][x][y]; }); - congestion_modeling_started_ = true; - double cong_cost = 0.; // Compute congestion cost using computed bounding boxes and channel utilization map if (compute_congestion_cost) { diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index 9d1714bfd3c..bbe1131e49b 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -553,7 +553,7 @@ class NetCostHandler { /** * @brief Computes the bounding box from scratch using 2D bounding boxes (per-layer mode) * @param method The method used to calculate placement cost. Specifies whether the cost is - * computed from scratch or incrementally. + * computed from scratch or incrementally. * @return (bounding box cost of the placement, estimated wirelength, congestion cost) * @note Congestion modeling is not supported for per-layer mode, so 0 is returned. * @note The returned estimated wirelength is valid only when method == CHECK @@ -588,13 +588,20 @@ class NetCostHandler { /** * @brief Given the 3D BB, calculate the wire-length cost of the net - * @param net_id ID of the net which cost is requested. + * @param net_id ID of the net whose cost is requested. * @param use_ts Specifies if the bounding box is retrieved from ts data structures - * or move context. + * or permanent data structures. * @return Wirelength cost of the net */ double get_net_cube_bb_cost_(ClusterNetId net_id, bool use_ts); + /** + * @brief Calculate the congestion cost of net using its 3D bounding box. + * @param net_id ID of the net whose cost is requested. + * @param use_ts Specifies if the bounding box is retrieved from ts data structures + * or move context. + * @return Congestion cost of the net + */ double get_net_cube_cong_cost_(ClusterNetId net_id, bool use_ts); /** From 1f77a455ae3f07f470984deaa28fccf62f0941ad Mon Sep 17 00:00:00 2001 From: Soheil Shahrouz Date: Thu, 10 Jul 2025 11:48:18 -0400 Subject: [PATCH 48/66] add help messages for congestion cmd options && update the comment and condition for enabling congestion modeling in the annealer --- vpr/src/base/read_options.cpp | 20 ++++++++++++-------- vpr/src/place/annealer.cpp | 10 ++++++++-- 2 files changed, 20 insertions(+), 10 deletions(-) diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index b631145c5df..cae2d51651f 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -93,6 +93,7 @@ struct ParseArchFormat { return {"vtr", "fpga-interchange"}; } }; + struct ParseCircuitFormat { ConvertedValue from_str(const std::string& str) { ConvertedValue conv_value; @@ -2334,9 +2335,9 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio "Specifies the type of bounding box to be used in 3D architectures.\n" "\n" "MODE options:\n" - " auto_bb : Automatically determine the appropriate bounding box based on the connections between layers.\n" - " cube_bb : Use 3D bounding boxes.\n" - " per_layer_bb : Use per-layer bounding boxes.\n" + " auto_bb : Automatically determine the appropriate bounding box based on the connections between layers.\n" + " cube_bb : Use 3D bounding boxes.\n" + " per_layer_bb : Use per-layer bounding boxes.\n" "\n" "Choose one of the available modes to define the behavior of bounding boxes in your 3D architecture. The default mode is 'automatic'.") .default_value("auto_bb") @@ -2490,18 +2491,21 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .show_in(argparse::ShowIn::HELP_ONLY); place_timing_grp.add_argument(args.place_congestion_factor, "--congestion_factor") - .help("To be written") + .help("Weighting factor for congestion cost during placement. " + "Higher values prioritize congestion avoidance over bounding box and timing costs. " + "When set to zero, congestion modeling and optimization is disabled in the placement stage.") .default_value("0.0") .show_in(argparse::ShowIn::HELP_ONLY); place_timing_grp.add_argument(args.place_congestion_rlim_trigger_ratio, "--congestion_rlim_trigger_ratio") - .help("To be written") - .default_value("0.0") + .help("Enables congestion modeling when the ratio of the current range limit to the initial range limit falls below this threshold, " + "provided the congestion weighting factor is non-zero.") + .default_value("1.0") .show_in(argparse::ShowIn::HELP_ONLY); place_timing_grp.add_argument(args.place_congestion_chan_util_threshold, "--congestion_chan_util_threshold") - .help("To be written") - .default_value("1.0") + .help("Penalizes nets in placement whose average routing channel utilization within their bounding boxes exceeds this threshold.") + .default_value("0.5") .show_in(argparse::ShowIn::HELP_ONLY); place_timing_grp.add_argument(args.RecomputeCritIter, "--recompute_crit_iter") diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index 845b109cbb5..fd07ef65bf2 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -679,8 +679,14 @@ void PlacementAnnealer::outer_loop_update_timing_info() { outer_crit_iter_count_++; } - if (congestion_modeling_started_ - || (annealing_state_.rlim / MoveGenerator::first_rlim) < placer_opts_.congestion_rlim_trigger_ratio) { + + // Congestion modeling is enabled when the ratio of the current range limit to the initial range limit + // drops below a user-specified threshold, and the congestion cost weighting factor is non-zero. + // Once enabled, congestion modeling continues even if the range limit increases and the ratio + // rises above the threshold. + if ((annealing_state_.rlim / MoveGenerator::first_rlim < placer_opts_.congestion_rlim_trigger_ratio + && placer_opts_.congestion_factor != 0.) + || congestion_modeling_started_) { costs_.congestion_cost = net_cost_handler_.estimate_routing_chan_util(); if (!congestion_modeling_started_) { From 34d801c362c3491330a33189307b91e8f8e75edb Mon Sep 17 00:00:00 2001 From: Soheil Shahrouz Date: Thu, 10 Jul 2025 11:55:13 -0400 Subject: [PATCH 49/66] clean doxygen comments in NetCostHandler by removing @params that no longer exist --- vpr/src/place/net_cost_handler.cpp | 8 ++++---- vpr/src/place/net_cost_handler.h | 26 ++++++++++---------------- 2 files changed, 14 insertions(+), 20 deletions(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index abaf066eae3..1e58b18aee0 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -303,10 +303,10 @@ std::tuple NetCostHandler::comp_per_layer_bb_cost_(e_cos // TODO: compute congestion cost constexpr double cong_cost = 0.; - for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { /* for each net ... */ - if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { /* Do only if not ignored. */ - /* Small nets don't use incremental updating on their bounding boxes, * - * so they can use a fast bounding box calculator. */ + for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { + if (!cluster_ctx.clb_nlist.net_is_ignored(net_id)) { + // Small nets don't use incremental updating on their bounding boxes, + //so they can use a fast bounding box calculator. if (cluster_ctx.clb_nlist.net_sinks(net_id).size() >= SMALL_NET && method == e_cost_methods::NORMAL) { get_layer_bb_from_scratch_(net_id, layer_bb_num_on_edges_[net_id], diff --git a/vpr/src/place/net_cost_handler.h b/vpr/src/place/net_cost_handler.h index bbe1131e49b..8f9d4511e1d 100644 --- a/vpr/src/place/net_cost_handler.h +++ b/vpr/src/place/net_cost_handler.h @@ -115,7 +115,6 @@ class NetCostHandler { /** * @brief Update net cost data structures (in placer context and net_cost in .cpp file) * and reset flags (proposed_net_cost and bb_updated_before). - * @param num_nets_affected The number of nets affected by the move. * It is used to determine the index up to which elements in ts_nets_to_update are valid. */ void update_move_nets(); @@ -125,7 +124,6 @@ class NetCostHandler { * and update "costs" accordingly. It is important to note that in this function bounding box * and connection delays are not calculated from scratch. However, it iterates over all nets * and connections and updates their costs by a complete summation, rather than incrementally. - * @param noc_opts Contains NoC cost weighting factors. * @param delay_model Placement delay model. Used to compute timing cost. * @param criticalities Contains the clustered netlist connection criticalities. * Used to computed timing cost . @@ -305,7 +303,7 @@ class NetCostHandler { * @brief The matrix below is used to calculate a chanz_place_cost_fac based on the average channel width in * the cross-die-layer direction over a 2D (x,y) region. We don't assume the inter-die connectivity is the same at all (x,y) locations, so we * can't compute the full chanz_place_cost_fac for all possible (xlow,ylow)(xhigh,yhigh) without a 4D array, which would - * be too big: O(n^2) in circuit size. Instead we compute a prefix sum that stores the number of inter-die connections per layer from + * be too big: O(n^2) in circuit size. Instead, we compute a prefix sum that stores the number of inter-die connections per layer from * (x=0,y=0) to (x,y). Given this, we can compute the average number of inter-die connections over a (xlow,ylow) to (xhigh,yhigh) * region in O(1) (by adding and subtracting 4 entries) */ @@ -435,10 +433,10 @@ class NetCostHandler { * @details This routine finds the bounding box of each net from scratch when the bounding box is of type per-layer (i.e. from * only the block location information). It updates the coordinate, number of pins on each edge information, and the * number of sinks on each layer. It should only be called when the bounding box information is not valid. - * @param net_id ID of the net which the moving pin belongs to - * @param coords Bounding box coordinates of the net. It is calculated in this function - * @param num_on_edges Net's number of blocks on the edges of the bounding box. It is calculated in this function. - * @param num_sink_pin_layer Net's number of sinks on each layer, calculated in this function. + * @param net_id ID of the net which the moving pin belongs to + * @param coords Bounding box coordinates of the net. It is calculated in this function + * @param num_on_edges Net's number of blocks on the edges of the bounding box. It is calculated in this function. + * @param layer_pin_sink_count Net's number of sinks on each layer, calculated in this function. */ void get_layer_bb_from_scratch_(ClusterNetId net_id, std::vector& num_on_edges, @@ -473,11 +471,9 @@ class NetCostHandler { * Currently assumes channels on both sides of the CLBs forming the edges of the bounding box can be used. * Essentially, I am assuming the pins always lie on the outside of the bounding box. The x and y coordinates * are the pin's x and y coordinates. IO blocks are considered to be one cell in for simplicity. - * @param bb_edge_new Number of blocks on the edges of the bounding box - * @param bb_coord_new Coordinates of the bounding box - * @param num_sink_pin_layer_new Number of sinks of the given net on each layer - * @param pin_old_loc The old location of the moving pin - * @param pin_new_loc The new location of the moving pin + * @param net_id Net whose bounding box is to be updated. + * @param pin_old_loc The old location of the moving pin + * @param pin_new_loc The new location of the moving pin * @param is_output_pin Is the moving pin of the type output */ void update_layer_bb_(ClusterNetId net_id, @@ -650,16 +646,14 @@ class NetCostHandler { /** * @brief Given the 3D BB, calculate the wire-length estimate of the net * @param net_id ID of the net which wirelength estimate is requested - * @param bb Bounding box of the net * @return Wirelength estimate of the net */ double get_net_wirelength_estimate_(ClusterNetId net_id) const; /** * @brief Given the per-layer BB, calculate the wire-length estimate of the net on each layer - * and return the sum of the lengths - * @param bb Per-layer BB of the net - * @param net_layer_pin_sink_count Number of sink pins on each layer for the net + * and return the sum of the lengths + * @param net_id Net whose weirelength is to be estimated. * @return Wirelength estimate of the net */ double get_net_wirelength_from_layer_bb_(ClusterNetId net_id) const; From e043a1fdca99eb5fe46490c1be5c7b1146f71c6e Mon Sep 17 00:00:00 2001 From: Soheil Shahrouz Date: Thu, 10 Jul 2025 12:05:58 -0400 Subject: [PATCH 50/66] fix segfault by guarding access to avg_chan_util_ --- vpr/src/place/net_cost_handler.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 1e58b18aee0..923361668ec 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -1876,7 +1876,9 @@ const ChannelData>& NetCostHandler::get_chan_util() con void NetCostHandler::set_ts_bb_coord_(const ClusterNetId net_id) { if (cube_bb_) { bb_coords_[net_id] = ts_bb_coord_new_[net_id]; - avg_chan_util_[net_id] = ts_avg_chan_util_new_[net_id]; + if (congestion_modeling_started_) { + avg_chan_util_[net_id] = ts_avg_chan_util_new_[net_id]; + } } else { layer_bb_coords_[net_id] = layer_ts_bb_coord_new_[net_id]; } From a4e318eb17053ea9d9db6b239108d981b1d20b4b Mon Sep 17 00:00:00 2001 From: Soheil Shahrouz Date: Thu, 10 Jul 2025 12:14:02 -0400 Subject: [PATCH 51/66] explain the logic behind not starting congestion modeling early in the anneal --- vpr/src/place/annealer.cpp | 10 +++++++--- vpr/src/place/net_cost_handler.cpp | 1 + 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/vpr/src/place/annealer.cpp b/vpr/src/place/annealer.cpp index fd07ef65bf2..ece401daa80 100644 --- a/vpr/src/place/annealer.cpp +++ b/vpr/src/place/annealer.cpp @@ -679,14 +679,18 @@ void PlacementAnnealer::outer_loop_update_timing_info() { outer_crit_iter_count_++; } - // Congestion modeling is enabled when the ratio of the current range limit to the initial range limit // drops below a user-specified threshold, and the congestion cost weighting factor is non-zero. // Once enabled, congestion modeling continues even if the range limit increases and the ratio // rises above the threshold. + // + // This logic is motivated by the observation that enabling congestion modeling too early in the + // anneal increases computational overhead and introduces noise into the placement cost function, + // as early placements are typically highly congested and unstable. So, we delay congestion modeling + // until the placement is more settled and wirelength has been reasonably optimized. if ((annealing_state_.rlim / MoveGenerator::first_rlim < placer_opts_.congestion_rlim_trigger_ratio - && placer_opts_.congestion_factor != 0.) - || congestion_modeling_started_) { + && placer_opts_.congestion_factor != 0.) + || congestion_modeling_started_) { costs_.congestion_cost = net_cost_handler_.estimate_routing_chan_util(); if (!congestion_modeling_started_) { diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 923361668ec..62514aa8670 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -301,6 +301,7 @@ std::tuple NetCostHandler::comp_per_layer_bb_cost_(e_cos double cost = 0.; double expected_wirelength = 0.; // TODO: compute congestion cost + // Congestion modeling is not supported for per-layer mode, so 0 is returned. constexpr double cong_cost = 0.; for (ClusterNetId net_id : cluster_ctx.clb_nlist.nets()) { From d2974d86941f1214f9a94224727a8ef2644af7c5 Mon Sep 17 00:00:00 2001 From: Soheil Shahrouz Date: Mon, 14 Jul 2025 13:18:53 -0400 Subject: [PATCH 52/66] add a high-level comment to explain how congestion is modeled --- vpr/src/place/net_cost_handler.cpp | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 62514aa8670..5dcda72b474 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -2,11 +2,11 @@ * @file net_cost_handler.cpp * @brief This file contains the implementation of functions used to update placement cost when a new move is proposed/committed. * - * VPR placement cost consists of three terms which represent wirelength, timing, and NoC cost. + * VPR placement cost consists of multiple terms which represent wirelength, timing, congestion, and NoC cost. * - * To get an estimation of the wirelength of each net, the Half Perimeter Wire Length (HPWL) approach is used. In this approach, + * To get an estimation of the wirelength of each net, the Half Perimeter Wire Length (HPWL) metric is used. In this approach, * half of the perimeter of the bounding box which contains all terminals of the net is multiplied by a correction factor, - * and the resulting number is considered as an estimation of the bounding box. + * and the resulting number is considered as an estimation of the wirelength needed to route this net. * * Currently, we have two types of bounding boxes: 3D bounding box (or Cube BB) and per-layer bounding box. * If the FPGA grid is a 2D structure, a Cube bounding box is used, which will always have the z direction equal to 1. For 3D architectures, @@ -20,6 +20,17 @@ * To get a delay estimation of a connection (from a source to a sink), first, dx and dy between these two points should be calculated, * and these two numbers are the indices to access this 2D array. By default, the placement delay model is created by iterating over the router lookahead * to get the minimum cost for each dx and dy. + * + * For congestion modeling, we periodically estimate routing channel usage by distributing the estimated + * wirelength (WL) of each net across all routing channels within its bounding box. The wirelength is divided + * between CHANX and CHANY in proportion to the bounding box's width and height, respectively. However, all + * routing channels of the same type (CHANX or CHANY) within the box receive an equal share of that net's WL. + * + * We compute a congestion cost for each net by averaging the estimated utilization over all CHANX and CHANY + * channels in its bounding box. These average utilizations are then compared to a user-specified threshold. + * If a net’s average utilization exceeds the threshold, the excess is penalized by adding a cost proportional + * to the amount of the exceedance. + * */ #include "net_cost_handler.h" @@ -551,6 +562,7 @@ void NetCostHandler::get_non_updatable_cube_bb_(ClusterNetId net_id, bool use_ts num_sink_pin_layer[pin_loc.layer_num]++; } + // Update average CHANX and CHANY usage for this net within its bounding box if congestion modeling is enabled if (congestion_modeling_started_) { auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chan_util_new_[net_id] : avg_chan_util_[net_id]; const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1); @@ -864,6 +876,7 @@ void NetCostHandler::update_bb_(ClusterNetId net_id, bb_update_status_[net_id] = NetUpdateState::UPDATED_ONCE; } + // Update average CHANX and CHANY usage for this net within its bounding box if congestion modeling is enabled if (congestion_modeling_started_) { auto& [x_chan_util, y_chan_util] = ts_avg_chan_util_new_[net_id]; const int total_channels = (bb_coord_new.xmax - bb_coord_new.xmin + 1) * (bb_coord_new.ymax - bb_coord_new.ymin + 1); @@ -1305,6 +1318,7 @@ void NetCostHandler::get_bb_from_scratch_(ClusterNetId net_id, bool use_ts) { num_on_edges.layer_min = layer_min_edge; num_on_edges.layer_max = layer_max_edge; + // Update average CHANX and CHANY usage for this net within its bounding box if congestion modeling is enabled if (congestion_modeling_started_) { auto& [x_chan_util, y_chan_util] = use_ts ? ts_avg_chan_util_new_[net_id] : avg_chan_util_[net_id]; const int total_channels = (coords.xmax - coords.xmin + 1) * (coords.ymax - coords.ymin + 1); @@ -1584,12 +1598,12 @@ void NetCostHandler::find_affected_nets_and_update_costs(const PlaceDelayModel* ts_nets_to_update_.resize(0); - /* Go through all the blocks moved. */ + // Go through all the blocks moved. for (const t_pl_moved_block& moving_block : blocks_affected.moved_blocks) { auto& affected_pins = blocks_affected.affected_pins; ClusterBlockId blk_id = moving_block.block_num; - /* Go through all the pins in the moved block. */ + // Go through all the pins in the moved block. for (ClusterPinId blk_pin : clb_nlist.block_pins(blk_id)) { bool is_src_moving = false; if (clb_nlist.pin_type(blk_pin) == PinType::SINK) { @@ -1606,13 +1620,13 @@ void NetCostHandler::find_affected_nets_and_update_costs(const PlaceDelayModel* } } - /* Now update the bounding box costs (since the net bounding * - * boxes are up-to-date). The cost is only updated once per net. */ + // Now update the bounding box costs (since the net bounding + // boxes are up-to-date). The cost is only updated once per net. set_bb_delta_cost_(bb_delta_c, congestion_delta_c); } void NetCostHandler::update_move_nets() { - /* update net cost functions and reset flags. */ + // update net cost functions and reset flags. const auto& cluster_ctx = g_vpr_ctx.clustering(); for (const ClusterNetId ts_net : ts_nets_to_update_) { From f71d778e222e1cccc248de45ce7772614feef9f1 Mon Sep 17 00:00:00 2001 From: Soheil Shahrouz Date: Mon, 14 Jul 2025 13:28:11 -0400 Subject: [PATCH 53/66] add comments for non-existing channels and cube_bb assumption --- vpr/src/place/net_cost_handler.cpp | 4 +++- vpr/src/route/route_common.cpp | 2 +- vpr/src/route/route_utilization.cpp | 1 + 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/vpr/src/place/net_cost_handler.cpp b/vpr/src/place/net_cost_handler.cpp index 5dcda72b474..77247f1510b 100644 --- a/vpr/src/place/net_cost_handler.cpp +++ b/vpr/src/place/net_cost_handler.cpp @@ -30,7 +30,6 @@ * channels in its bounding box. These average utilizations are then compared to a user-specified threshold. * If a net’s average utilization exceeds the threshold, the excess is penalized by adding a cost proportional * to the amount of the exceedance. - * */ #include "net_cost_handler.h" @@ -1836,6 +1835,9 @@ double NetCostHandler::estimate_routing_chan_util(bool compute_congestion_cost / VTR_ASSERT(chan_util_.x.size() == chan_width_.x.size()); VTR_ASSERT(chan_util_.y.size() == chan_width_.y.size()); + // Normalize channel utilizations by dividing by the corresponding channel widths. + // If a channel does not exist (i.e., its width is zero), we set its utilization to 1 + // to avoid division by zero. for (size_t layer = 0; layer < num_layers; ++layer) { for (size_t x = 0; x < grid_width; ++x) { for (size_t y = 0; y < grid_height; ++y) { diff --git a/vpr/src/route/route_common.cpp b/vpr/src/route/route_common.cpp index 521ae9d40e9..b5785cc5fc8 100644 --- a/vpr/src/route/route_common.cpp +++ b/vpr/src/route/route_common.cpp @@ -83,7 +83,6 @@ static bool classes_in_same_block(ParentBlockId blk_id, int first_class_ptc_num, * @param chan_util Post-placement estimate of routing channel utilization per (layer, x, y) location. * @return Initial `acc_cost` for the given RR node. */ - static float comp_initial_acc_cost(RRNodeId node_id, const t_router_opts& route_opts, const ChannelData>& chan_util); @@ -481,6 +480,7 @@ void reset_rr_node_route_structs(const t_router_opts& route_opts) { VTR_ASSERT(route_ctx.rr_node_route_inf.size() == size_t(device_ctx.rr_graph.num_nodes())); + // RoutingChanUtilEstimator assumes cube bounding box RoutingChanUtilEstimator routing_chan_util_estimator(blk_loc_registry); const ChannelData> chan_util = routing_chan_util_estimator.estimate_routing_chan_util(); diff --git a/vpr/src/route/route_utilization.cpp b/vpr/src/route/route_utilization.cpp index 2cc40349183..b8cb5b4c5df 100644 --- a/vpr/src/route/route_utilization.cpp +++ b/vpr/src/route/route_utilization.cpp @@ -10,6 +10,7 @@ RoutingChanUtilEstimator::RoutingChanUtilEstimator(const BlkLocRegistry& blk_loc placer_state_->mutable_blk_loc_registry() = blk_loc_registry; placer_opts_.place_algorithm = e_place_algorithm::BOUNDING_BOX_PLACE; + /// RoutingChanUtilEstimator uses cube bounding box net_cost_handler_ = std::make_unique(placer_opts_, *placer_state_, /*cube_bb=*/true); } From 94cc21948329f78c9cd2e48caff71b1cd4397a95 Mon Sep 17 00:00:00 2001 From: Soheil Shahrouz Date: Mon, 14 Jul 2025 13:41:52 -0400 Subject: [PATCH 54/66] clean setup_vpr --- vpr/src/base/setup_vpr.cpp | 110 ++++++++++++++++++------------------- vpr/src/base/setup_vpr.h | 6 ++ 2 files changed, 59 insertions(+), 57 deletions(-) diff --git a/vpr/src/base/setup_vpr.cpp b/vpr/src/base/setup_vpr.cpp index 6c04ae80056..b792ac4bf12 100644 --- a/vpr/src/base/setup_vpr.cpp +++ b/vpr/src/base/setup_vpr.cpp @@ -27,23 +27,61 @@ #include "setup_vib_utils.h" static void setup_netlist_opts(const t_options& Options, t_netlist_opts& NetlistOpts); + +/** + * @brief Sets up the t_ap_opts structure based on users inputs and + * on the architecture specified. + * + * Error checking, such as checking for conflicting params is assumed + * to be done beforehand + */ static void setup_ap_opts(const t_options& options, t_ap_opts& apOpts); + +/** + * @brief Sets up the t_packer_opts structure based on users inputs and + * on the architecture specified. + * + * Error checking, such as checking for conflicting params is assumed + * to be done beforehand + */ static void setup_packer_opts(const t_options& Options, t_packer_opts* PackerOpts); + +/** + * @brief Sets up the s_placer_opts structure based on users input. + * + * Error checking, such as checking for conflicting params + * is assumed to be done beforehand + */ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpts); static void setup_anneal_sched(const t_options& Options, t_annealing_sched* AnnealSched); static void setup_router_opts(const t_options& Options, t_router_opts* RouterOpts); + +/** + * Go through all the NoC options supplied by the user and store them internally. + */ static void setup_noc_opts(const t_options& Options, t_noc_opts* NocOpts); + static void setup_server_opts(const t_options& Options, t_server_opts* ServerOpts); +/** + * @brief Sets up routing structures. + * + * Since checks are already done, this just copies values across + */ static void setup_routing_arch(const t_arch& Arch, t_det_routing_arch& RoutingArch); static void setup_timing(const t_options& Options, const bool TimingEnabled, t_timing_inf* Timing); + +/** + * @brief This loads up VPR's arch_switch_inf data by combining the switches + * from the arch file with the special switches that VPR needs. + */ static void setup_switches(const t_arch& Arch, t_det_routing_arch& RoutingArch, const std::vector& arch_switches); @@ -72,22 +110,12 @@ static void add_intra_tile_switches(); /** * Identify the pins that can directly reach class_inf - * @param physical_tile - * @param logical_block - * @param class_inf - * @param physical_class_num */ static void do_reachability_analysis(t_physical_tile_type* physical_tile, t_logical_block_type* logical_block, t_class* class_inf, int physical_class_num); -/** - * @brief Sets VPR parameters and defaults. - * - * Does not do any error checking as this should have been done by - * the various input checkers - */ void SetupVPR(const t_options* options, const bool timingenabled, const bool readArchFile, @@ -306,7 +334,7 @@ void SetupVPR(const t_options* options, ShowSetup(*vpr_setup); - /* init global variables */ + // init global variables vtr::out_file_prefix = options->out_file_prefix; { @@ -348,7 +376,7 @@ void SetupVPR(const t_options* options, } static void setup_timing(const t_options& Options, const bool TimingEnabled, t_timing_inf* Timing) { - /* Don't do anything if they don't want timing */ + // Don't do anything if they don't want timing if (!TimingEnabled) { Timing->timing_analysis_enabled = false; return; @@ -358,10 +386,6 @@ static void setup_timing(const t_options& Options, const bool TimingEnabled, t_t Timing->SDCFile = Options.SDCFile; } -/** - * @brief This loads up VPR's arch_switch_inf data by combining the switches - * from the arch file with the special switches that VPR needs. - */ static void setup_switches(const t_arch& Arch, t_det_routing_arch& RoutingArch, const std::vector& arch_switches) { @@ -372,10 +396,10 @@ static void setup_switches(const t_arch& Arch, find_ipin_cblock_switch_index(Arch, RoutingArch.wire_to_arch_ipin_switch, RoutingArch.wire_to_arch_ipin_switch_between_dice); - /* Depends on device_ctx.num_arch_switches */ + // Depends on device_ctx.num_arch_switches RoutingArch.delayless_switch = num_arch_switches++; - /* Alloc the list now that we know the final num_arch_switches value */ + // Alloc the list now that we know the final num_arch_switches value device_ctx.arch_switch_inf.resize(num_arch_switches); for (int iswitch = 0; iswitch < switches_to_copy; iswitch++) { device_ctx.arch_switch_inf[iswitch] = arch_switches[iswitch]; @@ -384,7 +408,7 @@ static void setup_switches(const t_arch& Arch, device_ctx.all_sw_inf[iswitch] = arch_switches[iswitch]; } - /* Delayless switch for connecting sinks and sources with their pins. */ + // Delayless switch for connecting sinks and sources with their pins. device_ctx.arch_switch_inf[RoutingArch.delayless_switch].set_type(SwitchType::MUX); device_ctx.arch_switch_inf[RoutingArch.delayless_switch].name = std::string(VPR_DELAYLESS_SWITCH_NAME); device_ctx.arch_switch_inf[RoutingArch.delayless_switch].R = 0.; @@ -404,21 +428,15 @@ static void setup_switches(const t_arch& Arch, device_ctx.delayless_switch_idx = RoutingArch.delayless_switch; - //Warn about non-zero Cout values for the ipin switch, since these values have no effect. - //VPR do not model the R/C's of block internal routing connection. - // - //Note that we don't warn about the R value as it may be used to size the buffer (if buf_size_type is AUTO) + // Warn about non-zero Cout values for the ipin switch, since these values have no effect. + // VPR do not model the R/C's of block internal routing connection + // Note that we don't warn about the R value as it may be used to size the buffer (if buf_size_type is AUTO) if (device_ctx.arch_switch_inf[RoutingArch.wire_to_arch_ipin_switch].Cout != 0.) { VTR_LOG_WARN("Non-zero switch output capacitance (%g) has no effect when switch '%s' is used for connection block inputs\n", device_ctx.arch_switch_inf[RoutingArch.wire_to_arch_ipin_switch].Cout, Arch.ipin_cblock_switch_name[0].c_str()); } } -/** - * @brief Sets up routing structures. - * - * Since checks are already done, this just copies values across - */ static void setup_routing_arch(const t_arch& Arch, t_det_routing_arch& RoutingArch) { RoutingArch.switch_block_type = Arch.sb_type; @@ -432,10 +450,10 @@ static void setup_routing_arch(const t_arch& Arch, RoutingArch.directionality = Arch.Segments[0].directionality; } - /* copy over the switch block information */ + // copy over the switch block information RoutingArch.switchblocks = Arch.switchblocks; - /* Copy the tileable routing setting */ + // Copy the tileable routing setting RoutingArch.tileable = Arch.tileable; RoutingArch.perimeter_cb = Arch.perimeter_cb; RoutingArch.shrink_boundary = Arch.shrink_boundary; @@ -569,15 +587,8 @@ static void setup_anneal_sched(const t_options& Options, AnnealSched->type = Options.anneal_sched_type; } -/** - * @brief Sets up the t_ap_opts structure based on users inputs and - * on the architecture specified. - * - * Error checking, such as checking for conflicting params is assumed - * to be done beforehand - */ -void setup_ap_opts(const t_options& options, - t_ap_opts& apOpts) { +static void setup_ap_opts(const t_options& options, + t_ap_opts& apOpts) { apOpts.analytical_solver_type = options.ap_analytical_solver.value(); apOpts.partial_legalizer_type = options.ap_partial_legalizer.value(); apOpts.full_legalizer_type = options.ap_full_legalizer.value(); @@ -591,13 +602,6 @@ void setup_ap_opts(const t_options& options, apOpts.generate_mass_report = options.ap_generate_mass_report.value(); } -/** - * @brief Sets up the t_packer_opts structure based on users inputs and - * on the architecture specified. - * - * Error checking, such as checking for conflicting params is assumed - * to be done beforehand - */ void setup_packer_opts(const t_options& Options, t_packer_opts* PackerOpts) { PackerOpts->output_file = Options.NetFile; @@ -639,12 +643,6 @@ static void setup_netlist_opts(const t_options& Options, t_netlist_opts& Netlist NetlistOpts.netlist_verbosity = Options.netlist_verbosity; } -/** - * @brief Sets up the s_placer_opts structure based on users input. - * - * Error checking, such as checking for conflicting params - * is assumed to be done beforehand - */ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpts) { if (Options.do_placement) { PlacerOpts->doPlacement = e_stage_action::DO; @@ -677,7 +675,7 @@ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpt PlacerOpts->congestion_rlim_trigger_ratio = Options.place_congestion_rlim_trigger_ratio; PlacerOpts->congestion_chan_util_threshold = Options.place_congestion_chan_util_threshold; - /* Depends on PlacerOpts->place_algorithm */ + // Depends on PlacerOpts->place_algorithm PlacerOpts->delay_offset = Options.place_delay_offset; PlacerOpts->delay_ramp_delta_threshold = Options.place_delay_ramp_delta_threshold; PlacerOpts->delay_ramp_slope = Options.place_delay_ramp_slope; @@ -686,7 +684,7 @@ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpt PlacerOpts->delay_model_type = Options.place_delay_model; PlacerOpts->delay_model_reducer = Options.place_delay_model_reducer; - PlacerOpts->place_freq = PLACE_ONCE; /* DEFAULT */ + PlacerOpts->place_freq = PLACE_ONCE; // DEFAULT PlacerOpts->post_place_timing_report_file = Options.post_place_timing_report_file; @@ -778,9 +776,7 @@ static void setup_power_opts(const t_options& Options, t_power_opts* power_opts, } } -/* - * Go through all the NoC options supplied by the user and store them internally. - */ + static void setup_noc_opts(const t_options& Options, t_noc_opts* NocOpts) { // assign the noc specific options from the command line NocOpts->noc = Options.noc; diff --git a/vpr/src/base/setup_vpr.h b/vpr/src/base/setup_vpr.h index f72bb231bd3..7364b8bb05d 100644 --- a/vpr/src/base/setup_vpr.h +++ b/vpr/src/base/setup_vpr.h @@ -5,6 +5,12 @@ #include "physical_types.h" #include "vpr_types.h" +/** + * @brief Sets VPR parameters and defaults. + * + * Does not do any error checking as this should have been done by + * the various input checkers + */ void SetupVPR(const t_options* Options, const bool TimingEnabled, const bool readArchFile, From be2cc281f110a057469fbdde003948a694ee603e Mon Sep 17 00:00:00 2001 From: Soheil Shahrouz Date: Wed, 16 Jul 2025 11:20:45 -0400 Subject: [PATCH 55/66] make format --- vpr/src/base/setup_vpr.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/vpr/src/base/setup_vpr.cpp b/vpr/src/base/setup_vpr.cpp index b792ac4bf12..0f973168cde 100644 --- a/vpr/src/base/setup_vpr.cpp +++ b/vpr/src/base/setup_vpr.cpp @@ -776,7 +776,6 @@ static void setup_power_opts(const t_options& Options, t_power_opts* power_opts, } } - static void setup_noc_opts(const t_options& Options, t_noc_opts* NocOpts) { // assign the noc specific options from the command line NocOpts->noc = Options.noc; From 58413b7ee737dd30bd12174f6877b1b9435be08a Mon Sep 17 00:00:00 2001 From: Soheil Shahrouz Date: Mon, 21 Jul 2025 11:09:28 -0400 Subject: [PATCH 56/66] call clean_floorplanning_context_post_place() outside try_place() --- vpr/src/base/place_and_route.cpp | 2 ++ vpr/src/base/setup_vpr.cpp | 2 +- vpr/src/base/vpr_api.cpp | 12 +++++++----- vpr/src/place/place.cpp | 26 +++++++++++--------------- vpr/src/place/place_constraints.cpp | 2 +- 5 files changed, 22 insertions(+), 22 deletions(-) diff --git a/vpr/src/base/place_and_route.cpp b/vpr/src/base/place_and_route.cpp index 138d411539d..b7145f3ef09 100644 --- a/vpr/src/base/place_and_route.cpp +++ b/vpr/src/base/place_and_route.cpp @@ -360,6 +360,8 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list, } } + g_vpr_ctx.mutable_floorplanning().clean_floorplanning_context_post_place(); + // End binary search verification. // Restore the best placement (if necessary), the best routing, and the // best channel widths for final drawing and statistics output. diff --git a/vpr/src/base/setup_vpr.cpp b/vpr/src/base/setup_vpr.cpp index 4f2cc967bca..4148bdeac4b 100644 --- a/vpr/src/base/setup_vpr.cpp +++ b/vpr/src/base/setup_vpr.cpp @@ -683,7 +683,7 @@ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpt PlacerOpts->delay_model_type = Options.place_delay_model; PlacerOpts->delay_model_reducer = Options.place_delay_model_reducer; - PlacerOpts->place_freq = PLACE_ONCE; /* DEFAULT */ + PlacerOpts->place_freq = PLACE_ALWAYS; /* DEFAULT */ PlacerOpts->post_place_timing_report_file = Options.post_place_timing_report_file; diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index dcd0d2394c9..7ba9580ef9d 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -726,7 +726,7 @@ void vpr_load_packing(const t_vpr_setup& vpr_setup, const t_arch& arch) { auto& cluster_ctx = g_vpr_ctx.mutable_clustering(); const AtomContext& atom_ctx = g_vpr_ctx.atom(); - /* Ensure we have a clean start with void net remapping information */ + // Ensure we have a clean start with void net remapping information cluster_ctx.post_routing_clb_pin_nets.clear(); cluster_ctx.pre_routing_net_pin_mapping.clear(); @@ -735,7 +735,7 @@ void vpr_load_packing(const t_vpr_setup& vpr_setup, const t_arch& arch) { vpr_setup.FileNameOpts.verify_file_digests, vpr_setup.PackerOpts.pack_verbosity); - /* Load the mapping between clusters and their atoms */ + // Load the mapping between clusters and their atoms init_clb_atoms_lookup(cluster_ctx.atoms_lookup, atom_ctx, cluster_ctx.clb_nlist); process_constant_nets(g_vpr_ctx.mutable_atom().mutable_netlist(), @@ -749,14 +749,14 @@ void vpr_load_packing(const t_vpr_setup& vpr_setup, const t_arch& arch) { report_packing_pin_usage(ofs, g_vpr_ctx); } - // Ater the clustered netlist has been loaded, update the floorplanning + // After the clustered netlist has been loaded, update the floorplanning // constraints with the new information. g_vpr_ctx.mutable_floorplanning().update_floorplanning_context_post_pack(); /* Sanity check the resulting netlist */ check_netlist(vpr_setup.PackerOpts.pack_verbosity); - // Independently verify the clusterings to ensure the clustering can be + // Independently verify the clustering to ensure the clustering can be // used for the rest of the VPR flow. unsigned num_errors = verify_clustering(g_vpr_ctx); if (num_errors == 0) { @@ -768,7 +768,7 @@ void vpr_load_packing(const t_vpr_setup& vpr_setup, const t_arch& arch) { num_errors); } - /* Output the netlist stats to console and optionally to file. */ + // Output the netlist stats to console and optionally to file. writeClusteredNetlistStats(vpr_setup.FileNameOpts.write_block_usage); // print the total number of used physical blocks for each @@ -887,6 +887,8 @@ void vpr_place(const Netlist<>& net_list, g_vpr_ctx.atom().flat_placement_info(), is_flat); + g_vpr_ctx.mutable_floorplanning().clean_floorplanning_context_post_place(); + auto& filename_opts = vpr_setup.FileNameOpts; auto& cluster_ctx = g_vpr_ctx.clustering(); const auto& block_locs = g_vpr_ctx.placement().block_locs(); diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index c5d46b5af3f..6f2ebb89e0f 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -38,10 +38,9 @@ void try_place(const Netlist<>& net_list, const FlatPlacementInfo& flat_placement_info, bool is_flat) { - /* Currently, the functions that require is_flat as their parameter and are called during placement should - * receive is_flat as false. For example, if the RR graph of router lookahead is built here, it should be as - * if is_flat is false, even if is_flat is set to true from the command line. - */ + // Currently, the functions that require is_flat as their parameter and are called during placement should + // receive is_flat as false. For example, if the RR graph of router lookahead is built here, it should be as + // if is_flat is false, even if is_flat is set to true from the command line VTR_ASSERT(!is_flat); const auto& device_ctx = g_vpr_ctx.device(); const auto& cluster_ctx = g_vpr_ctx.clustering(); @@ -74,9 +73,9 @@ void try_place(const Netlist<>& net_list, normalize_noc_cost_weighting_factor(const_cast(noc_opts)); } - /* Placement delay model is independent of the placement and can be shared across - * multiple placers if we are performing parallel annealing. - * So, it is created and initialized once. */ + // Placement delay model is independent of the placement and can be shared across + // multiple placers if we are performing parallel annealing. + // So, it is created and initialized once. */ std::shared_ptr place_delay_model; if (placer_opts.place_algorithm.is_timing_driven()) { @@ -101,9 +100,8 @@ void try_place(const Netlist<>& net_list, */ mutable_placement.lock_loc_vars(); - /* Start measuring placement time. The measured execution time will be printed - * when this object goes out of scope at the end of this function. - */ + // Start measuring placement time. The measured execution time will be printed + // when this object goes out of scope at the end of this function. vtr::ScopedStartFinishTimer placement_timer("Placement"); // Enables fast look-up pb graph pins from block pin indices @@ -117,17 +115,15 @@ void try_place(const Netlist<>& net_list, placer.place(); - /* The placer object has its own copy of block locations and doesn't update - * the global context directly. We need to copy its internal data structures - * to the global placement context before it goes out of scope. - */ + // The placer object has its own copy of block locations and doesn't update + // the global context directly. We need to copy its internal data structures + // to the global placement context before it goes out of scope. placer.update_global_state(); // Clean the variables in the placement context. This will deallocate memory // used by variables which were allocated in the placement context and are // never used outside of placement. mutable_placement.clean_placement_context_post_place(); - mutable_floorplanning.clean_floorplanning_context_post_place(); } #ifdef VERBOSE diff --git a/vpr/src/place/place_constraints.cpp b/vpr/src/place/place_constraints.cpp index ef867ce5b1a..fdad4813cb2 100644 --- a/vpr/src/place/place_constraints.cpp +++ b/vpr/src/place/place_constraints.cpp @@ -209,7 +209,7 @@ void load_cluster_constraints() { floorplanning_ctx.cluster_constraints.resize(cluster_ctx.clb_nlist.blocks().size()); - for (auto cluster_id : cluster_ctx.clb_nlist.blocks()) { + for (ClusterBlockId cluster_id : cluster_ctx.clb_nlist.blocks()) { const std::unordered_set& atoms = cluster_ctx.atoms_lookup[cluster_id]; PartitionRegion empty_pr; floorplanning_ctx.cluster_constraints[cluster_id] = empty_pr; From d8ebd1818c84eb148a14c0ada7cac3251cb4a686 Mon Sep 17 00:00:00 2001 From: Soheil Shahrouz Date: Mon, 21 Jul 2025 13:48:45 -0400 Subject: [PATCH 57/66] snake case, comment style, and typos --- vpr/src/base/read_options.cpp | 46 +++++++++++++-------------- vpr/src/base/read_options.h | 51 +++++++++++++++--------------- vpr/src/base/setup_vpr.cpp | 20 ++++++------ vpr/src/base/vpr_api.cpp | 3 +- vpr/src/pack/verify_clustering.cpp | 2 +- vpr/src/place/place.cpp | 29 ++++++++--------- 6 files changed, 75 insertions(+), 76 deletions(-) diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 57c449b6d26..401f6f09fc4 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -2177,7 +2177,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio auto& place_grp = parser.add_argument_group("placement options"); - place_grp.add_argument(args.Seed, "--seed") + place_grp.add_argument(args.seed, "--seed") .help("Placement random number generator seed") .default_value("1") .show_in(argparse::ShowIn::HELP_ONLY); @@ -2195,7 +2195,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .default_value("astar") .show_in(argparse::ShowIn::HELP_ONLY); - place_grp.add_argument(args.PlaceInnerNum, "--inner_num") + place_grp.add_argument(args.place_inner_num, "--inner_num") .help("Controls number of moves per temperature: inner_num * num_blocks ^ (4/3)") .default_value("0.5") .show_in(argparse::ShowIn::HELP_ONLY); @@ -2226,17 +2226,17 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .default_value("1.0") .show_in(argparse::ShowIn::HELP_ONLY); - place_grp.add_argument(args.PlaceInitT, "--init_t") + place_grp.add_argument(args.place_init_t, "--init_t") .help("Initial temperature for manual annealing schedule") .default_value("100.0") .show_in(argparse::ShowIn::HELP_ONLY); - place_grp.add_argument(args.PlaceExitT, "--exit_t") + place_grp.add_argument(args.place_exit_t, "--exit_t") .help("Temperature at which annealing which terminate for manual annealing schedule") .default_value("0.01") .show_in(argparse::ShowIn::HELP_ONLY); - place_grp.add_argument(args.PlaceAlphaT, "--alpha_t") + place_grp.add_argument(args.place_alpha_t, "--alpha_t") .help( "Temperature scaling factor for manual annealing schedule." " Old temperature is multiplied by alpha_t") @@ -2259,7 +2259,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .default_value("") .show_in(argparse::ShowIn::HELP_ONLY); - place_grp.add_argument(args.PlaceAlgorithm, "--place_algorithm") + place_grp.add_argument(args.place_algorithm, "--place_algorithm") .help( "Controls which placement algorithm is used. Valid options:\n" " * bounding_box: Focuses purely on minimizing the bounding box wirelength of the circuit. Turns off timing analysis if specified.\n" @@ -2269,7 +2269,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .choices({"bounding_box", "criticality_timing", "slack_timing"}) .show_in(argparse::ShowIn::HELP_ONLY); - place_grp.add_argument(args.PlaceQuenchAlgorithm, "--place_quench_algorithm") + place_grp.add_argument(args.place_quench_algorithm, "--place_quench_algorithm") .help( "Controls which placement algorithm is used during placement quench.\n" "If specified, it overrides the option --place_algorithm during placement quench.\n" @@ -2281,7 +2281,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .choices({"bounding_box", "criticality_timing", "slack_timing"}) .show_in(argparse::ShowIn::HELP_ONLY); - place_grp.add_argument(args.PlaceChanWidth, "--place_chan_width") + place_grp.add_argument(args.place_chan_width, "--place_chan_width") .help( "Sets the assumed channel width during placement. " "If --place_chan_width is unspecified, but --route_chan_width is specified the " @@ -2483,14 +2483,14 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio auto& place_timing_grp = parser.add_argument_group("timing-driven placement options"); - place_timing_grp.add_argument(args.PlaceTimingTradeoff, "--timing_tradeoff") + place_timing_grp.add_argument(args.place_timing_tradeoff, "--timing_tradeoff") .help( "Trade-off control between delay and wirelength during placement." " 0.0 focuses completely on wirelength, 1.0 completely on timing") .default_value("0.5") .show_in(argparse::ShowIn::HELP_ONLY); - place_timing_grp.add_argument(args.RecomputeCritIter, "--recompute_crit_iter") + place_timing_grp.add_argument(args.recompute_crit_iter, "--recompute_crit_iter") .help("Controls how many temperature updates occur between timing analysis during placement") .default_value("1") .show_in(argparse::ShowIn::HELP_ONLY); @@ -3449,11 +3449,11 @@ void set_conditional_defaults(t_options& args) { */ //Which placement algorithm to use? - if (args.PlaceAlgorithm.provenance() != Provenance::SPECIFIED) { + if (args.place_algorithm.provenance() != Provenance::SPECIFIED) { if (args.timing_analysis) { - args.PlaceAlgorithm.set(e_place_algorithm::CRITICALITY_TIMING_PLACE, Provenance::INFERRED); + args.place_algorithm.set(e_place_algorithm::CRITICALITY_TIMING_PLACE, Provenance::INFERRED); } else { - args.PlaceAlgorithm.set(e_place_algorithm::BOUNDING_BOX_PLACE, Provenance::INFERRED); + args.place_algorithm.set(e_place_algorithm::BOUNDING_BOX_PLACE, Provenance::INFERRED); } } @@ -3467,7 +3467,7 @@ void set_conditional_defaults(t_options& args) { // Check for correct options combinations // If you are running WLdriven placement, the RL reward function should be // either basic or nonPenalizing basic - if (args.RL_agent_placement && (args.PlaceAlgorithm == e_place_algorithm::BOUNDING_BOX_PLACE || !args.timing_analysis)) { + if (args.RL_agent_placement && (args.place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE || !args.timing_analysis)) { if (args.place_reward_fun.value() != "basic" && args.place_reward_fun.value() != "nonPenalizing_basic") { VTR_LOG_WARN( "To use RLPlace for WLdriven placements, the reward function should be basic or nonPenalizing_basic.\n" @@ -3478,18 +3478,18 @@ void set_conditional_defaults(t_options& args) { } //Which placement algorithm to use during placement quench? - if (args.PlaceQuenchAlgorithm.provenance() != Provenance::SPECIFIED) { - args.PlaceQuenchAlgorithm.set(args.PlaceAlgorithm, Provenance::INFERRED); + if (args.place_quench_algorithm.provenance() != Provenance::SPECIFIED) { + args.place_quench_algorithm.set(args.place_algorithm, Provenance::INFERRED); } //Place chan width follows Route chan width if unspecified - if (args.PlaceChanWidth.provenance() != Provenance::SPECIFIED && args.RouteChanWidth.provenance() == Provenance::SPECIFIED) { - args.PlaceChanWidth.set(args.RouteChanWidth.value(), Provenance::INFERRED); + if (args.place_chan_width.provenance() != Provenance::SPECIFIED && args.RouteChanWidth.provenance() == Provenance::SPECIFIED) { + args.place_chan_width.set(args.RouteChanWidth.value(), Provenance::INFERRED); } //Do we calculate timing info during placement? - if (args.ShowPlaceTiming.provenance() != Provenance::SPECIFIED) { - args.ShowPlaceTiming.set(args.timing_analysis, Provenance::INFERRED); + if (args.show_place_timing.provenance() != Provenance::SPECIFIED) { + args.show_place_timing.set(args.timing_analysis, Provenance::INFERRED); } //Slave quench recompute divider of inner loop recompute divider unless specified @@ -3498,9 +3498,9 @@ void set_conditional_defaults(t_options& args) { } //Which schedule? - if (args.PlaceInitT.provenance() == Provenance::SPECIFIED // Any of these flags select a manual schedule - || args.PlaceExitT.provenance() == Provenance::SPECIFIED - || args.PlaceAlphaT.provenance() == Provenance::SPECIFIED) { + if (args.place_init_t.provenance() == Provenance::SPECIFIED // Any of these flags select a manual schedule + || args.place_exit_t.provenance() == Provenance::SPECIFIED + || args.place_alpha_t.provenance() == Provenance::SPECIFIED) { args.anneal_sched_type.set(e_sched_type::USER_SCHED, Provenance::INFERRED); } else { args.anneal_sched_type.set(e_sched_type::AUTO_SCHED, Provenance::INFERRED); // Otherwise use the automatic schedule diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h index f846867af77..ece3e391629 100644 --- a/vpr/src/base/read_options.h +++ b/vpr/src/base/read_options.h @@ -9,7 +9,7 @@ #include "argparse.hpp" struct t_options { - /* File names */ + // File names argparse::ArgValue ArchFile; argparse::ArgValue CircuitName; argparse::ArgValue NetFile; @@ -49,7 +49,7 @@ struct t_options { argparse::ArgValue write_block_usage; - /* Stage Options */ + // Stage Options argparse::ArgValue do_packing; argparse::ArgValue do_legalize; argparse::ArgValue do_placement; @@ -58,13 +58,13 @@ struct t_options { argparse::ArgValue do_analysis; argparse::ArgValue do_power; - /* Graphics Options */ + // Graphics Options argparse::ArgValue show_graphics; ///eractive graphics? argparse::ArgValue GraphPause; argparse::ArgValue save_graphics; argparse::ArgValue graphics_commands; - /* General options */ + // General options argparse::ArgValue show_help; argparse::ArgValue show_version; argparse::ArgValue show_arch_resources; @@ -86,11 +86,11 @@ struct t_options { argparse::ArgValue allow_dangling_combinational_nodes; argparse::ArgValue terminate_if_timing_fails; - /* Server options */ + // Server options argparse::ArgValue is_server_mode_enabled; argparse::ArgValue server_port_num; - /* Atom netlist options */ + // Atom netlist options argparse::ArgValue absorb_buffer_luts; argparse::ArgValue const_gen_inference; argparse::ArgValue sweep_dangling_primary_ios; @@ -99,7 +99,7 @@ struct t_options { argparse::ArgValue sweep_constant_primary_outputs; argparse::ArgValue netlist_verbosity; - /* Analytical Placement options */ + // Analytical Placement options argparse::ArgValue ap_analytical_solver; argparse::ArgValue ap_partial_legalizer; argparse::ArgValue ap_full_legalizer; @@ -111,7 +111,7 @@ struct t_options { argparse::ArgValue ap_high_fanout_threshold; argparse::ArgValue ap_generate_mass_report; - /* Clustering options */ + // Clustering options argparse::ArgValue connection_driven_clustering; argparse::ArgValue allow_unrelated_clustering; argparse::ArgValue timing_gain_weight; @@ -126,19 +126,20 @@ struct t_options { argparse::ArgValue pack_feasible_block_array_size; argparse::ArgValue> pack_high_fanout_threshold; argparse::ArgValue pack_verbosity; - /* Placement options */ - argparse::ArgValue Seed; - argparse::ArgValue ShowPlaceTiming; - argparse::ArgValue PlaceInnerNum; + + // Placement options + argparse::ArgValue seed; + argparse::ArgValue show_place_timing; + argparse::ArgValue place_inner_num; argparse::ArgValue place_auto_init_t_scale; - argparse::ArgValue PlaceInitT; - argparse::ArgValue PlaceExitT; - argparse::ArgValue PlaceAlphaT; + argparse::ArgValue place_init_t; + argparse::ArgValue place_exit_t; + argparse::ArgValue place_alpha_t; argparse::ArgValue anneal_sched_type; - argparse::ArgValue PlaceAlgorithm; - argparse::ArgValue PlaceQuenchAlgorithm; + argparse::ArgValue place_algorithm; + argparse::ArgValue place_quench_algorithm; argparse::ArgValue pad_loc_type; - argparse::ArgValue PlaceChanWidth; + argparse::ArgValue place_chan_width; argparse::ArgValue place_rlim_escape_fraction; argparse::ArgValue place_move_stats_file; argparse::ArgValue placement_saves_per_temperature; @@ -167,7 +168,7 @@ struct t_options { argparse::ArgValue placer_debug_block; argparse::ArgValue placer_debug_net; - /*NoC Options*/ + // NoC Options argparse::ArgValue noc; argparse::ArgValue noc_flows_file; argparse::ArgValue noc_routing_algorithm; @@ -185,9 +186,9 @@ struct t_options { argparse::ArgValue noc_sat_routing_log_search_progress; argparse::ArgValue noc_placement_file_name; - /* Timing-driven placement options only */ - argparse::ArgValue PlaceTimingTradeoff; - argparse::ArgValue RecomputeCritIter; + // Timing-driven placement options only + argparse::ArgValue place_timing_tradeoff; + argparse::ArgValue recompute_crit_iter; argparse::ArgValue inner_loop_recompute_divider; argparse::ArgValue quench_recompute_divider; argparse::ArgValue place_exp_first; @@ -202,7 +203,7 @@ struct t_options { argparse::ArgValue place_delay_model_reducer; argparse::ArgValue allowed_tiles_for_delay_model; - /* Router Options */ + // Router Options argparse::ArgValue check_rr_graph; argparse::ArgValue max_router_iterations; argparse::ArgValue first_iter_pres_fac; @@ -232,7 +233,7 @@ struct t_options { argparse::ArgValue route_verbosity; argparse::ArgValue custom_3d_sb_fanin_fanout; - /* Timing-driven router options only */ + // Timing-driven router options only argparse::ArgValue astar_fac; argparse::ArgValue astar_offset; argparse::ArgValue router_profiler_astar_fac; @@ -267,7 +268,7 @@ struct t_options { argparse::ArgValue router_initial_timing; argparse::ArgValue router_heap; - /* Analysis options */ + // Analysis options argparse::ArgValue full_stats; argparse::ArgValue Generate_Post_Synthesis_Netlist; argparse::ArgValue Generate_Post_Implementation_Merged_Netlist; diff --git a/vpr/src/base/setup_vpr.cpp b/vpr/src/base/setup_vpr.cpp index 4148bdeac4b..8f3b6f0b4c8 100644 --- a/vpr/src/base/setup_vpr.cpp +++ b/vpr/src/base/setup_vpr.cpp @@ -542,17 +542,17 @@ static void setup_router_opts(const t_options& Options, t_router_opts* RouterOpt static void setup_anneal_sched(const t_options& Options, t_annealing_sched* AnnealSched) { - AnnealSched->alpha_t = Options.PlaceAlphaT; + AnnealSched->alpha_t = Options.place_alpha_t; if (AnnealSched->alpha_t >= 1 || AnnealSched->alpha_t <= 0) { VPR_FATAL_ERROR(VPR_ERROR_OTHER, "alpha_t must be between 0 and 1 exclusive.\n"); } - AnnealSched->exit_t = Options.PlaceExitT; + AnnealSched->exit_t = Options.place_exit_t; if (AnnealSched->exit_t <= 0) { VPR_FATAL_ERROR(VPR_ERROR_OTHER, "exit_t must be greater than 0.\n"); } - AnnealSched->init_t = Options.PlaceInitT; + AnnealSched->init_t = Options.place_init_t; if (AnnealSched->init_t <= 0) { VPR_FATAL_ERROR(VPR_ERROR_OTHER, "init_t must be greater than 0.\n"); } @@ -561,7 +561,7 @@ static void setup_anneal_sched(const t_options& Options, VPR_FATAL_ERROR(VPR_ERROR_OTHER, "init_t must be greater or equal to than exit_t.\n"); } - AnnealSched->inner_num = Options.PlaceInnerNum; + AnnealSched->inner_num = Options.place_inner_num; if (AnnealSched->inner_num <= 0) { VPR_FATAL_ERROR(VPR_ERROR_OTHER, "inner_num must be greater than 0.\n"); } @@ -657,8 +657,8 @@ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpt PlacerOpts->td_place_exp_last = Options.place_exp_last; - PlacerOpts->place_algorithm = Options.PlaceAlgorithm; - PlacerOpts->place_quench_algorithm = Options.PlaceQuenchAlgorithm; + PlacerOpts->place_algorithm = Options.place_algorithm; + PlacerOpts->place_quench_algorithm = Options.place_quench_algorithm; PlacerOpts->constraints_file = Options.constraints_file; @@ -668,11 +668,11 @@ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpt PlacerOpts->pad_loc_type = Options.pad_loc_type; - PlacerOpts->place_chan_width = Options.PlaceChanWidth; + PlacerOpts->place_chan_width = Options.place_chan_width; - PlacerOpts->recompute_crit_iter = Options.RecomputeCritIter; + PlacerOpts->recompute_crit_iter = Options.recompute_crit_iter; - PlacerOpts->timing_tradeoff = Options.PlaceTimingTradeoff; + PlacerOpts->timing_tradeoff = Options.place_timing_tradeoff; /* Depends on PlacerOpts->place_algorithm */ PlacerOpts->delay_offset = Options.place_delay_offset; @@ -721,7 +721,7 @@ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpt PlacerOpts->floorplan_num_vertical_partitions = Options.floorplan_num_vertical_partitions; PlacerOpts->place_quench_only = Options.place_quench_only; - PlacerOpts->seed = Options.Seed; + PlacerOpts->seed = Options.seed; PlacerOpts->placer_debug_block = Options.placer_debug_block; PlacerOpts->placer_debug_net = Options.placer_debug_net; diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index 7ba9580ef9d..d3a8ec901c3 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -753,7 +753,7 @@ void vpr_load_packing(const t_vpr_setup& vpr_setup, const t_arch& arch) { // constraints with the new information. g_vpr_ctx.mutable_floorplanning().update_floorplanning_context_post_pack(); - /* Sanity check the resulting netlist */ + // Sanity check the resulting netlist check_netlist(vpr_setup.PackerOpts.pack_verbosity); // Independently verify the clustering to ensure the clustering can be @@ -887,7 +887,6 @@ void vpr_place(const Netlist<>& net_list, g_vpr_ctx.atom().flat_placement_info(), is_flat); - g_vpr_ctx.mutable_floorplanning().clean_floorplanning_context_post_place(); auto& filename_opts = vpr_setup.FileNameOpts; auto& cluster_ctx = g_vpr_ctx.clustering(); diff --git a/vpr/src/pack/verify_clustering.cpp b/vpr/src/pack/verify_clustering.cpp index ec08e10a40b..93f925ef68b 100644 --- a/vpr/src/pack/verify_clustering.cpp +++ b/vpr/src/pack/verify_clustering.cpp @@ -406,7 +406,7 @@ unsigned verify_clustering(const ClusteredNetlist& clb_nlist, // Return here since this error can cause serious issues below. return num_errors; } - // Check conssitency between which clusters the atom's think thet are in and + // Check consistency between which clusters the atom's think thet are in and // which atoms the clusters think they have. num_errors += check_clustering_atom_consistency(clb_nlist, atom_nlist, diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 6f2ebb89e0f..467dbed31f9 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -59,16 +59,16 @@ void try_place(const Netlist<>& net_list, VTR_LOG("Bounding box mode is %s\n", (mutable_placement.cube_bb ? "Cube" : "Per-layer")); VTR_LOG("\n"); - /* To make sure the importance of NoC-related cost terms compared to - * BB and timing cost is determine only through NoC placement weighting factor, - * we normalize NoC-related cost weighting factors so that they add up to 1. - * With this normalization, NoC-related cost weighting factors only determine - * the relative importance of NoC cost terms with respect to each other, while - * the importance of total NoC cost to conventional placement cost is determined - * by NoC placement weighting factor. - * FIXME: This should not be modifying the NoC Opts here, this normalization - * should occur when these Opts are loaded in. - */ + + // To make sure the importance of NoC-related cost terms compared to + // BB and timing cost is determine only through NoC placement weighting factor, + // we normalize NoC-related cost weighting factors so that they add up to 1. + // With this normalization, NoC-related cost weighting factors only determine + // the relative importance of NoC cost terms with respect to each other, while + // the importance of total NoC cost to conventional placement cost is determined + // by NoC placement weighting factor. + // FIXME: This should not be modifying the NoC Opts here, this normalization + // should occur when these Opts are loaded in. if (noc_opts.noc) { normalize_noc_cost_weighting_factor(const_cast(noc_opts)); } @@ -94,10 +94,9 @@ void try_place(const Netlist<>& net_list, } } - /* Make the global instance of BlkLocRegistry inaccessible through the getter methods of the - * placement context. This is done to make sure that the placement stage only accesses its - * own local instances of BlkLocRegistry. - */ + // Make the global instance of BlkLocRegistry inaccessible through the getter methods of the + // placement context. This is done to make sure that the placement stage only accesses its + // own local instances of BlkLocRegistry. mutable_placement.lock_loc_vars(); // Start measuring placement time. The measured execution time will be printed @@ -150,7 +149,7 @@ static void update_screen_debug(); //Performs a major (i.e. interactive) placement screen update. //This function with no arguments is useful for calling from a debugger to -//look at the intermediate implemetnation state. +//look at the intermediate implementation state. static void update_screen_debug() { update_screen(ScreenUpdatePriority::MAJOR, "DEBUG", PLACEMENT, nullptr); } From b8604c6f058aaabab114bea692576f7c1315f59a Mon Sep 17 00:00:00 2001 From: Soheil Shahrouz Date: Mon, 21 Jul 2025 13:49:16 -0400 Subject: [PATCH 58/66] call pdate_floorplanning_context_post_pack() at the start of placement if cluster_constraints.empty() --- vpr/src/base/place_and_route.cpp | 2 -- vpr/src/place/place.cpp | 5 +++++ 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/vpr/src/base/place_and_route.cpp b/vpr/src/base/place_and_route.cpp index b7145f3ef09..138d411539d 100644 --- a/vpr/src/base/place_and_route.cpp +++ b/vpr/src/base/place_and_route.cpp @@ -360,8 +360,6 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list, } } - g_vpr_ctx.mutable_floorplanning().clean_floorplanning_context_post_place(); - // End binary search verification. // Restore the best placement (if necessary), the best routing, and the // best channel widths for final drawing and statistics output. diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 467dbed31f9..6133c89fff1 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -51,6 +51,10 @@ void try_place(const Netlist<>& net_list, // Initialize the variables in the placement context. mutable_placement.init_placement_context(placer_opts, directs); + if (mutable_floorplanning.cluster_constraints.empty()) { + mutable_floorplanning.update_floorplanning_context_post_pack(); + } + // Update the floorplanning constraints with the macro information from the // placement context. mutable_floorplanning.update_floorplanning_context_pre_place(*mutable_placement.place_macros); @@ -123,6 +127,7 @@ void try_place(const Netlist<>& net_list, // used by variables which were allocated in the placement context and are // never used outside of placement. mutable_placement.clean_placement_context_post_place(); + mutable_floorplanning.clean_floorplanning_context_post_place(); } #ifdef VERBOSE From 88b8c1030f29c3e62439f4293c2439cbec47f991 Mon Sep 17 00:00:00 2001 From: Soheil Shahrouz Date: Mon, 21 Jul 2025 15:21:46 -0400 Subject: [PATCH 59/66] enum class e_place_freq --- vpr/src/base/ShowSetup.cpp | 15 +++++++-------- vpr/src/base/place_and_route.cpp | 8 ++++---- vpr/src/base/setup_vpr.cpp | 2 +- vpr/src/base/vpr_types.h | 16 +++++++++------- 4 files changed, 21 insertions(+), 20 deletions(-) diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp index 2e8d36e5f2c..0a10a017772 100644 --- a/vpr/src/base/ShowSetup.cpp +++ b/vpr/src/base/ShowSetup.cpp @@ -495,20 +495,19 @@ static void ShowRouterOpts(const t_router_opts& RouterOpts) { static void ShowPlacerOpts(const t_placer_opts& PlacerOpts) { VTR_LOG("PlacerOpts.place_freq: "); switch (PlacerOpts.place_freq) { - case PLACE_ONCE: - VTR_LOG("PLACE_ONCE\n"); + case e_place_freq::ONCE: + VTR_LOG("ONCE\n"); break; - case PLACE_ALWAYS: - VTR_LOG("PLACE_ALWAYS\n"); + case e_place_freq::ALWAYS: + VTR_LOG("ALWAYS\n"); break; - case PLACE_NEVER: - VTR_LOG("PLACE_NEVER\n"); + case e_place_freq::NEVER: + VTR_LOG("NEVER\n"); break; default: VTR_LOG_ERROR("Unknown Place Freq\n"); } - if ((PLACE_ONCE == PlacerOpts.place_freq) - || (PLACE_ALWAYS == PlacerOpts.place_freq)) { + if (PlacerOpts.place_freq == e_place_freq::ONCE || PlacerOpts.place_freq == e_place_freq::ALWAYS) { VTR_LOG("PlacerOpts.place_algorithm: "); switch (PlacerOpts.place_algorithm.get()) { case e_place_algorithm::BOUNDING_BOX_PLACE: diff --git a/vpr/src/base/place_and_route.cpp b/vpr/src/base/place_and_route.cpp index 138d411539d..6381eb39dc3 100644 --- a/vpr/src/base/place_and_route.cpp +++ b/vpr/src/base/place_and_route.cpp @@ -167,7 +167,7 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list, break; } - if (placer_opts.place_freq == PLACE_ALWAYS) { + if (placer_opts.place_freq == e_place_freq::ALWAYS) { placer_opts.place_chan_width = current; try_place(placement_net_list, placer_opts, @@ -312,7 +312,7 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list, fflush(stdout); if (current < 1) break; - if (placer_opts.place_freq == PLACE_ALWAYS) { + if (placer_opts.place_freq == e_place_freq::ALWAYS) { placer_opts.place_chan_width = current; try_place(placement_net_list, placer_opts, router_opts, analysis_opts, noc_opts, arch->Chans, det_routing_arch, segment_inf, @@ -341,7 +341,7 @@ int binary_search_place_and_route(const Netlist<>& placement_net_list, route_ctx.clb_opins_used_locally, saved_clb_opins_used_locally); - if (placer_opts.place_freq == PLACE_ALWAYS) { + if (placer_opts.place_freq == e_place_freq::ALWAYS) { auto& cluster_ctx = g_vpr_ctx.clustering(); // Cluster-based net_list is used for placement std::string placement_id = print_place(filename_opts.NetFile.c_str(), cluster_ctx.clb_nlist.netlist_id().c_str(), @@ -417,7 +417,7 @@ t_chan_width setup_chan_width(const t_router_opts& router_opts, if (router_opts.fixed_channel_width == NO_FIXED_CHANNEL_WIDTH) { auto& device_ctx = g_vpr_ctx.device(); - auto type = find_most_common_tile_type(device_ctx.grid); + t_physical_tile_type_ptr type = find_most_common_tile_type(device_ctx.grid); width_fac = 4 * type->num_pins; // this is 2x the value that binary search starts diff --git a/vpr/src/base/setup_vpr.cpp b/vpr/src/base/setup_vpr.cpp index 8f3b6f0b4c8..09728e969bb 100644 --- a/vpr/src/base/setup_vpr.cpp +++ b/vpr/src/base/setup_vpr.cpp @@ -683,7 +683,7 @@ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpt PlacerOpts->delay_model_type = Options.place_delay_model; PlacerOpts->delay_model_reducer = Options.place_delay_model_reducer; - PlacerOpts->place_freq = PLACE_ALWAYS; /* DEFAULT */ + PlacerOpts->place_freq = e_place_freq::ALWAYS; /* DEFAULT */ PlacerOpts->post_place_timing_report_file = Options.post_place_timing_report_file; diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 41da9a6d085..726b8c3e02d 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -375,23 +375,25 @@ constexpr int NUM_PL_MOVE_TYPES = 7; constexpr int NUM_PL_NONTIMING_MOVE_TYPES = 3; /* Timing data structures end */ + +// Annealing schedule enum class e_sched_type { AUTO_SCHED, USER_SCHED }; -/* Annealing schedule */ + +// What's on screen? enum pic_type { NO_PICTURE, PLACEMENT, ROUTING }; -/* What's on screen? */ -enum pfreq { - PLACE_NEVER, - PLACE_ONCE, - PLACE_ALWAYS +enum class e_place_freq { + NEVER, + ONCE, + ALWAYS }; ///@brief Power data for t_netlist structure @@ -1032,7 +1034,7 @@ struct t_placer_opts { std::string constraints_file; std::string write_initial_place_file; std::string read_initial_place_file; - enum pfreq place_freq; + e_place_freq place_freq; int recompute_crit_iter; int inner_loop_recompute_divider; int quench_recompute_divider; From eff6743a986bbe1462720d7627e7ce7e770658b0 Mon Sep 17 00:00:00 2001 From: Soheil Shahrouz Date: Mon, 21 Jul 2025 15:35:32 -0400 Subject: [PATCH 60/66] remove e_place_freq::NEVER --- vpr/src/base/ShowSetup.cpp | 3 --- vpr/src/base/vpr_types.h | 1 - 2 files changed, 4 deletions(-) diff --git a/vpr/src/base/ShowSetup.cpp b/vpr/src/base/ShowSetup.cpp index 0a10a017772..858bd3198ce 100644 --- a/vpr/src/base/ShowSetup.cpp +++ b/vpr/src/base/ShowSetup.cpp @@ -501,9 +501,6 @@ static void ShowPlacerOpts(const t_placer_opts& PlacerOpts) { case e_place_freq::ALWAYS: VTR_LOG("ALWAYS\n"); break; - case e_place_freq::NEVER: - VTR_LOG("NEVER\n"); - break; default: VTR_LOG_ERROR("Unknown Place Freq\n"); } diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index 726b8c3e02d..e9c49f4f281 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -391,7 +391,6 @@ enum pic_type { }; enum class e_place_freq { - NEVER, ONCE, ALWAYS }; From 4192107a41b750c72af372403f195427a6ee4ecb Mon Sep 17 00:00:00 2001 From: Soheil Shahrouz Date: Mon, 21 Jul 2025 15:36:24 -0400 Subject: [PATCH 61/66] add --place_frequency to read_options --- vpr/src/base/read_options.cpp | 37 +++++++++++++++++++++++++++++++++++ vpr/src/base/read_options.h | 1 + vpr/src/base/setup_vpr.cpp | 2 +- 3 files changed, 39 insertions(+), 1 deletion(-) diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 401f6f09fc4..eb1daa4063d 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -618,6 +618,37 @@ struct ParsePlaceBoundingBox { } }; +struct ParsePlacementFreq { + ConvertedValue from_str(const std::string& str) { + ConvertedValue conv_value; + if (str == "once") { + conv_value.set_value(e_place_freq::ONCE); + } else if (str == "always") { + conv_value.set_value(e_place_freq::ALWAYS); + } else { + std::stringstream msg; + msg << "Invalid conversion from '" << str << "' to e_place_freq (expected one of: " << argparse::join(default_choices(), ", ") << ")"; + conv_value.set_error(msg.str()); + } + return conv_value; + } + + ConvertedValue to_str(e_place_freq val) { + ConvertedValue conv_value; + if (val == e_place_freq::ONCE) { + conv_value.set_value("once"); + } else { + VTR_ASSERT(val == e_place_freq::ALWAYS); + conv_value.set_value("always"); + } + return conv_value; + } + + std::vector default_choices() { + return {"once", "always"}; + } +}; + struct ParsePlaceAgentAlgorithm { ConvertedValue from_str(const std::string& str) { ConvertedValue conv_value; @@ -2343,6 +2374,12 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .choices({"auto_bb", "cube_bb", "per_layer_bb"}) .show_in(argparse::ShowIn::HELP_ONLY); + place_grp.add_argument(args.place_placement_freq, "--place_frequency") + .help("Run placement every time or only once during channel width search.") + .default_value("once") + .choices({"once, always"}) + .show_in(argparse::ShowIn::HELP_ONLY); + place_grp.add_argument(args.RL_agent_placement, "--RL_agent_placement") .help( "Uses a Reinforcement Learning (RL) agent in choosing the appropriate move type in placement." diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h index ece3e391629..d364354ecfc 100644 --- a/vpr/src/base/read_options.h +++ b/vpr/src/base/read_options.h @@ -148,6 +148,7 @@ struct t_options { argparse::ArgValue> place_static_move_prob; argparse::ArgValue place_high_fanout_net; argparse::ArgValue place_bounding_box_mode; + argparse::ArgValue place_placement_freq; argparse::ArgValue RL_agent_placement; argparse::ArgValue place_agent_multistate; diff --git a/vpr/src/base/setup_vpr.cpp b/vpr/src/base/setup_vpr.cpp index 09728e969bb..f010181d82e 100644 --- a/vpr/src/base/setup_vpr.cpp +++ b/vpr/src/base/setup_vpr.cpp @@ -683,7 +683,7 @@ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpt PlacerOpts->delay_model_type = Options.place_delay_model; PlacerOpts->delay_model_reducer = Options.place_delay_model_reducer; - PlacerOpts->place_freq = e_place_freq::ALWAYS; /* DEFAULT */ + PlacerOpts->place_freq = Options.place_placement_freq; PlacerOpts->post_place_timing_report_file = Options.post_place_timing_report_file; From 7c3210094495b80ef2b02c71d77aa68b941a2ebb Mon Sep 17 00:00:00 2001 From: Soheil Shahrouz Date: Mon, 21 Jul 2025 16:00:11 -0400 Subject: [PATCH 62/66] update command_line_usage.rst to add --place_frequency --- doc/src/vpr/command_line_usage.rst | 13 ++++++++++++- vpr/src/base/read_options.cpp | 6 +++--- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/doc/src/vpr/command_line_usage.rst b/doc/src/vpr/command_line_usage.rst index b3d482e048a..fe1b46f83b6 100644 --- a/doc/src/vpr/command_line_usage.rst +++ b/doc/src/vpr/command_line_usage.rst @@ -907,6 +907,16 @@ If any of init_t, exit_t or alpha_t is specified, the user schedule, with a fixe **Default:** ``auto_bb`` +.. option:: --place_frequency {once | always} + + Specifies how often placement is performed during the minimum channel width search. + + ``once``: Placement is run only once at the beginning of the channel width search. This reduces runtime but may not benefit from congestion-aware optimizations. + + ``always``: Placement is rerun for each channel width trial. This might improve routability at the cost of increased runtime. + + **Default:** ``once`` + .. option:: --place_chan_width Tells VPR how many tracks a channel of relative width 1 is expected to need to complete routing of this circuit. @@ -1869,6 +1879,7 @@ The following options are only valid when the router is in timing-driven mode (t **Default:** ``0.5`` .. option:: --router_initial_acc_cost_chan_congestion_weight + Weight applied to the excess channel utilization (above threshold) when computing the initial accumulated cost (acc_cost)of routing resources. Higher values make the router more sensitive to early congestion. @@ -1907,7 +1918,7 @@ The following options are only valid when the router is in timing-driven mode (t .. option:: --router_first_iter_timing_report - Name of the timing report file to generate after the first routing iteration completes (not generated if unspecfied). + Name of the timing report file to generate after the first routing iteration completes (not generated if unspecified). .. option:: --router_debug_net diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index eb1daa4063d..6d013d8b9ed 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -2365,9 +2365,9 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio "Specifies the type of bounding box to be used in 3D architectures.\n" "\n" "MODE options:\n" - " auto_bb : Automatically determine the appropriate bounding box based on the connections between layers.\n" - " cube_bb : Use 3D bounding boxes.\n" - " per_layer_bb : Use per-layer bounding boxes.\n" + " auto_bb : Automatically determine the appropriate bounding box based on the connections between layers.\n" + " cube_bb : Use 3D bounding boxes.\n" + " per_layer_bb : Use per-layer bounding boxes.\n" "\n" "Choose one of the available modes to define the behavior of bounding boxes in your 3D architecture. The default mode is 'automatic'.") .default_value("auto_bb") From 8833c79f38e33e1512fa3498faac86992147a503 Mon Sep 17 00:00:00 2001 From: Soheil Shahrouz Date: Mon, 21 Jul 2025 13:48:45 -0400 Subject: [PATCH 63/66] cherry pick commits to run placement for each channel width trial in MRCW search --- vpr/src/base/read_options.cpp | 59 +++++++++++++++--------------- vpr/src/base/read_options.h | 54 ++++++++++++++------------- vpr/src/base/setup_vpr.cpp | 22 +++++------ vpr/src/base/vpr_api.cpp | 4 +- vpr/src/pack/verify_clustering.cpp | 2 +- vpr/src/place/place.cpp | 29 +++++++-------- 6 files changed, 85 insertions(+), 85 deletions(-) diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index cae2d51651f..912455aabfc 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -2178,7 +2178,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio auto& place_grp = parser.add_argument_group("placement options"); - place_grp.add_argument(args.Seed, "--seed") + place_grp.add_argument(args.seed, "--seed") .help("Placement random number generator seed") .default_value("1") .show_in(argparse::ShowIn::HELP_ONLY); @@ -2196,7 +2196,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .default_value("astar") .show_in(argparse::ShowIn::HELP_ONLY); - place_grp.add_argument(args.PlaceInnerNum, "--inner_num") + place_grp.add_argument(args.place_inner_num, "--inner_num") .help("Controls number of moves per temperature: inner_num * num_blocks ^ (4/3)") .default_value("0.5") .show_in(argparse::ShowIn::HELP_ONLY); @@ -2227,17 +2227,17 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .default_value("1.0") .show_in(argparse::ShowIn::HELP_ONLY); - place_grp.add_argument(args.PlaceInitT, "--init_t") + place_grp.add_argument(args.place_init_t, "--init_t") .help("Initial temperature for manual annealing schedule") .default_value("100.0") .show_in(argparse::ShowIn::HELP_ONLY); - place_grp.add_argument(args.PlaceExitT, "--exit_t") + place_grp.add_argument(args.place_exit_t, "--exit_t") .help("Temperature at which annealing which terminate for manual annealing schedule") .default_value("0.01") .show_in(argparse::ShowIn::HELP_ONLY); - place_grp.add_argument(args.PlaceAlphaT, "--alpha_t") + place_grp.add_argument(args.place_alpha_t, "--alpha_t") .help( "Temperature scaling factor for manual annealing schedule." " Old temperature is multiplied by alpha_t") @@ -2260,7 +2260,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .default_value("") .show_in(argparse::ShowIn::HELP_ONLY); - place_grp.add_argument(args.PlaceAlgorithm, "--place_algorithm") + place_grp.add_argument(args.place_algorithm, "--place_algorithm") .help( "Controls which placement algorithm is used. Valid options:\n" " * bounding_box: Focuses purely on minimizing the bounding box wirelength of the circuit. Turns off timing analysis if specified.\n" @@ -2270,7 +2270,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .choices({"bounding_box", "criticality_timing", "slack_timing"}) .show_in(argparse::ShowIn::HELP_ONLY); - place_grp.add_argument(args.PlaceQuenchAlgorithm, "--place_quench_algorithm") + place_grp.add_argument(args.place_quench_algorithm, "--place_quench_algorithm") .help( "Controls which placement algorithm is used during placement quench.\n" "If specified, it overrides the option --place_algorithm during placement quench.\n" @@ -2282,7 +2282,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .choices({"bounding_box", "criticality_timing", "slack_timing"}) .show_in(argparse::ShowIn::HELP_ONLY); - place_grp.add_argument(args.PlaceChanWidth, "--place_chan_width") + place_grp.add_argument(args.place_chan_width, "--place_chan_width") .help( "Sets the assumed channel width during placement. " "If --place_chan_width is unspecified, but --route_chan_width is specified the " @@ -2484,11 +2484,12 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio auto& place_timing_grp = parser.add_argument_group("timing-driven placement options"); - place_timing_grp.add_argument(args.PlaceTimingTradeoff, "--timing_tradeoff") - .help("Trade-off control between delay and wirelength during placement. " - "0.0 focuses completely on wirelength, 1.0 completely on timing") - .default_value("0.5") - .show_in(argparse::ShowIn::HELP_ONLY); + place_timing_grp.add_argument(args.place_timing_tradeoff, "--timing_tradeoff") + .help( + "Trade-off control between delay and wirelength during placement." + " 0.0 focuses completely on wirelength, 1.0 completely on timing") + .default_value("0.5") + .show_in(argparse::ShowIn::HELP_ONLY); place_timing_grp.add_argument(args.place_congestion_factor, "--congestion_factor") .help("Weighting factor for congestion cost during placement. " @@ -2504,11 +2505,9 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio .show_in(argparse::ShowIn::HELP_ONLY); place_timing_grp.add_argument(args.place_congestion_chan_util_threshold, "--congestion_chan_util_threshold") - .help("Penalizes nets in placement whose average routing channel utilization within their bounding boxes exceeds this threshold.") - .default_value("0.5") - .show_in(argparse::ShowIn::HELP_ONLY); + .help("Penalizes nets in placement whose average routing channel utilization within their bounding boxes exceeds this threshold."); - place_timing_grp.add_argument(args.RecomputeCritIter, "--recompute_crit_iter") + place_timing_grp.add_argument(args.recompute_crit_iter, "--recompute_crit_iter") .help("Controls how many temperature updates occur between timing analysis during placement") .default_value("1") .show_in(argparse::ShowIn::HELP_ONLY); @@ -3467,11 +3466,11 @@ void set_conditional_defaults(t_options& args) { */ //Which placement algorithm to use? - if (args.PlaceAlgorithm.provenance() != Provenance::SPECIFIED) { + if (args.place_algorithm.provenance() != Provenance::SPECIFIED) { if (args.timing_analysis) { - args.PlaceAlgorithm.set(e_place_algorithm::CRITICALITY_TIMING_PLACE, Provenance::INFERRED); + args.place_algorithm.set(e_place_algorithm::CRITICALITY_TIMING_PLACE, Provenance::INFERRED); } else { - args.PlaceAlgorithm.set(e_place_algorithm::BOUNDING_BOX_PLACE, Provenance::INFERRED); + args.place_algorithm.set(e_place_algorithm::BOUNDING_BOX_PLACE, Provenance::INFERRED); } } @@ -3485,7 +3484,7 @@ void set_conditional_defaults(t_options& args) { // Check for correct options combinations // If you are running WLdriven placement, the RL reward function should be // either basic or nonPenalizing basic - if (args.RL_agent_placement && (args.PlaceAlgorithm == e_place_algorithm::BOUNDING_BOX_PLACE || !args.timing_analysis)) { + if (args.RL_agent_placement && (args.place_algorithm == e_place_algorithm::BOUNDING_BOX_PLACE || !args.timing_analysis)) { if (args.place_reward_fun.value() != "basic" && args.place_reward_fun.value() != "nonPenalizing_basic") { VTR_LOG_WARN( "To use RLPlace for WLdriven placements, the reward function should be basic or nonPenalizing_basic.\n" @@ -3496,18 +3495,18 @@ void set_conditional_defaults(t_options& args) { } //Which placement algorithm to use during placement quench? - if (args.PlaceQuenchAlgorithm.provenance() != Provenance::SPECIFIED) { - args.PlaceQuenchAlgorithm.set(args.PlaceAlgorithm, Provenance::INFERRED); + if (args.place_quench_algorithm.provenance() != Provenance::SPECIFIED) { + args.place_quench_algorithm.set(args.place_algorithm, Provenance::INFERRED); } //Place chan width follows Route chan width if unspecified - if (args.PlaceChanWidth.provenance() != Provenance::SPECIFIED && args.RouteChanWidth.provenance() == Provenance::SPECIFIED) { - args.PlaceChanWidth.set(args.RouteChanWidth.value(), Provenance::INFERRED); + if (args.place_chan_width.provenance() != Provenance::SPECIFIED && args.RouteChanWidth.provenance() == Provenance::SPECIFIED) { + args.place_chan_width.set(args.RouteChanWidth.value(), Provenance::INFERRED); } //Do we calculate timing info during placement? - if (args.ShowPlaceTiming.provenance() != Provenance::SPECIFIED) { - args.ShowPlaceTiming.set(args.timing_analysis, Provenance::INFERRED); + if (args.show_place_timing.provenance() != Provenance::SPECIFIED) { + args.show_place_timing.set(args.timing_analysis, Provenance::INFERRED); } //Slave quench recompute divider of inner loop recompute divider unless specified @@ -3516,9 +3515,9 @@ void set_conditional_defaults(t_options& args) { } //Which schedule? - if (args.PlaceInitT.provenance() == Provenance::SPECIFIED // Any of these flags select a manual schedule - || args.PlaceExitT.provenance() == Provenance::SPECIFIED - || args.PlaceAlphaT.provenance() == Provenance::SPECIFIED) { + if (args.place_init_t.provenance() == Provenance::SPECIFIED // Any of these flags select a manual schedule + || args.place_exit_t.provenance() == Provenance::SPECIFIED + || args.place_alpha_t.provenance() == Provenance::SPECIFIED) { args.anneal_sched_type.set(e_sched_type::USER_SCHED, Provenance::INFERRED); } else { args.anneal_sched_type.set(e_sched_type::AUTO_SCHED, Provenance::INFERRED); // Otherwise use the automatic schedule diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h index 267dd2ab8cf..c0efe2ed503 100644 --- a/vpr/src/base/read_options.h +++ b/vpr/src/base/read_options.h @@ -9,7 +9,7 @@ #include "argparse.hpp" struct t_options { - /* File names */ + // File names argparse::ArgValue ArchFile; argparse::ArgValue CircuitName; argparse::ArgValue NetFile; @@ -49,7 +49,7 @@ struct t_options { argparse::ArgValue write_block_usage; - /* Stage Options */ + // Stage Options argparse::ArgValue do_packing; argparse::ArgValue do_legalize; argparse::ArgValue do_placement; @@ -58,13 +58,13 @@ struct t_options { argparse::ArgValue do_analysis; argparse::ArgValue do_power; - /* Graphics Options */ + // Graphics Options argparse::ArgValue show_graphics; ///eractive graphics? argparse::ArgValue GraphPause; argparse::ArgValue save_graphics; argparse::ArgValue graphics_commands; - /* General options */ + // General options argparse::ArgValue show_help; argparse::ArgValue show_version; argparse::ArgValue show_arch_resources; @@ -86,11 +86,11 @@ struct t_options { argparse::ArgValue allow_dangling_combinational_nodes; argparse::ArgValue terminate_if_timing_fails; - /* Server options */ + // Server options argparse::ArgValue is_server_mode_enabled; argparse::ArgValue server_port_num; - /* Atom netlist options */ + // Atom netlist options argparse::ArgValue absorb_buffer_luts; argparse::ArgValue const_gen_inference; argparse::ArgValue sweep_dangling_primary_ios; @@ -99,7 +99,7 @@ struct t_options { argparse::ArgValue sweep_constant_primary_outputs; argparse::ArgValue netlist_verbosity; - /* Analytical Placement options */ + // Analytical Placement options argparse::ArgValue ap_analytical_solver; argparse::ArgValue ap_partial_legalizer; argparse::ArgValue ap_full_legalizer; @@ -111,7 +111,7 @@ struct t_options { argparse::ArgValue ap_high_fanout_threshold; argparse::ArgValue ap_generate_mass_report; - /* Clustering options */ + // Clustering options argparse::ArgValue connection_driven_clustering; argparse::ArgValue allow_unrelated_clustering; argparse::ArgValue timing_gain_weight; @@ -126,19 +126,20 @@ struct t_options { argparse::ArgValue pack_feasible_block_array_size; argparse::ArgValue> pack_high_fanout_threshold; argparse::ArgValue pack_verbosity; - /* Placement options */ - argparse::ArgValue Seed; - argparse::ArgValue ShowPlaceTiming; - argparse::ArgValue PlaceInnerNum; + + // Placement options + argparse::ArgValue seed; + argparse::ArgValue show_place_timing; + argparse::ArgValue place_inner_num; argparse::ArgValue place_auto_init_t_scale; - argparse::ArgValue PlaceInitT; - argparse::ArgValue PlaceExitT; - argparse::ArgValue PlaceAlphaT; + argparse::ArgValue place_init_t; + argparse::ArgValue place_exit_t; + argparse::ArgValue place_alpha_t; argparse::ArgValue anneal_sched_type; - argparse::ArgValue PlaceAlgorithm; - argparse::ArgValue PlaceQuenchAlgorithm; + argparse::ArgValue place_algorithm; + argparse::ArgValue place_quench_algorithm; argparse::ArgValue pad_loc_type; - argparse::ArgValue PlaceChanWidth; + argparse::ArgValue place_chan_width; argparse::ArgValue place_rlim_escape_fraction; argparse::ArgValue place_move_stats_file; argparse::ArgValue placement_saves_per_temperature; @@ -167,7 +168,7 @@ struct t_options { argparse::ArgValue placer_debug_block; argparse::ArgValue placer_debug_net; - /*NoC Options*/ + // NoC Options argparse::ArgValue noc; argparse::ArgValue noc_flows_file; argparse::ArgValue noc_routing_algorithm; @@ -185,13 +186,14 @@ struct t_options { argparse::ArgValue noc_sat_routing_log_search_progress; argparse::ArgValue noc_placement_file_name; - /* Timing-driven placement options only */ - argparse::ArgValue PlaceTimingTradeoff; + + + // Timing-driven placement options only argparse::ArgValue place_congestion_factor; argparse::ArgValue place_congestion_rlim_trigger_ratio; argparse::ArgValue place_congestion_chan_util_threshold; - - argparse::ArgValue RecomputeCritIter; + argparse::ArgValue place_timing_tradeoff; + argparse::ArgValue recompute_crit_iter; argparse::ArgValue inner_loop_recompute_divider; argparse::ArgValue quench_recompute_divider; argparse::ArgValue place_exp_first; @@ -206,7 +208,7 @@ struct t_options { argparse::ArgValue place_delay_model_reducer; argparse::ArgValue allowed_tiles_for_delay_model; - /* Router Options */ + // Router Options argparse::ArgValue check_rr_graph; argparse::ArgValue max_router_iterations; argparse::ArgValue first_iter_pres_fac; @@ -236,7 +238,7 @@ struct t_options { argparse::ArgValue route_verbosity; argparse::ArgValue custom_3d_sb_fanin_fanout; - /* Timing-driven router options only */ + // Timing-driven router options only argparse::ArgValue astar_fac; argparse::ArgValue astar_offset; argparse::ArgValue router_profiler_astar_fac; @@ -271,7 +273,7 @@ struct t_options { argparse::ArgValue router_initial_timing; argparse::ArgValue router_heap; - /* Analysis options */ + // Analysis options argparse::ArgValue full_stats; argparse::ArgValue Generate_Post_Synthesis_Netlist; argparse::ArgValue Generate_Post_Implementation_Merged_Netlist; diff --git a/vpr/src/base/setup_vpr.cpp b/vpr/src/base/setup_vpr.cpp index 0f973168cde..da0ce231aa6 100644 --- a/vpr/src/base/setup_vpr.cpp +++ b/vpr/src/base/setup_vpr.cpp @@ -560,17 +560,17 @@ static void setup_router_opts(const t_options& Options, t_router_opts* RouterOpt static void setup_anneal_sched(const t_options& Options, t_annealing_sched* AnnealSched) { - AnnealSched->alpha_t = Options.PlaceAlphaT; + AnnealSched->alpha_t = Options.place_alpha_t; if (AnnealSched->alpha_t >= 1 || AnnealSched->alpha_t <= 0) { VPR_FATAL_ERROR(VPR_ERROR_OTHER, "alpha_t must be between 0 and 1 exclusive.\n"); } - AnnealSched->exit_t = Options.PlaceExitT; + AnnealSched->exit_t = Options.place_exit_t; if (AnnealSched->exit_t <= 0) { VPR_FATAL_ERROR(VPR_ERROR_OTHER, "exit_t must be greater than 0.\n"); } - AnnealSched->init_t = Options.PlaceInitT; + AnnealSched->init_t = Options.place_init_t; if (AnnealSched->init_t <= 0) { VPR_FATAL_ERROR(VPR_ERROR_OTHER, "init_t must be greater than 0.\n"); } @@ -579,7 +579,7 @@ static void setup_anneal_sched(const t_options& Options, VPR_FATAL_ERROR(VPR_ERROR_OTHER, "init_t must be greater or equal to than exit_t.\n"); } - AnnealSched->inner_num = Options.PlaceInnerNum; + AnnealSched->inner_num = Options.place_inner_num; if (AnnealSched->inner_num <= 0) { VPR_FATAL_ERROR(VPR_ERROR_OTHER, "inner_num must be greater than 0.\n"); } @@ -655,8 +655,8 @@ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpt PlacerOpts->td_place_exp_last = Options.place_exp_last; - PlacerOpts->place_algorithm = Options.PlaceAlgorithm; - PlacerOpts->place_quench_algorithm = Options.PlaceQuenchAlgorithm; + PlacerOpts->place_algorithm = Options.place_algorithm; + PlacerOpts->place_quench_algorithm = Options.place_quench_algorithm; PlacerOpts->constraints_file = Options.constraints_file; @@ -666,11 +666,11 @@ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpt PlacerOpts->pad_loc_type = Options.pad_loc_type; - PlacerOpts->place_chan_width = Options.PlaceChanWidth; + PlacerOpts->place_chan_width = Options.place_chan_width; - PlacerOpts->recompute_crit_iter = Options.RecomputeCritIter; - - PlacerOpts->timing_tradeoff = Options.PlaceTimingTradeoff; + PlacerOpts->recompute_crit_iter = Options.recompute_crit_iter; + + PlacerOpts->timing_tradeoff = Options.place_timing_tradeoff; PlacerOpts->congestion_factor = Options.place_congestion_factor; PlacerOpts->congestion_rlim_trigger_ratio = Options.place_congestion_rlim_trigger_ratio; PlacerOpts->congestion_chan_util_threshold = Options.place_congestion_chan_util_threshold; @@ -722,7 +722,7 @@ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpt PlacerOpts->floorplan_num_vertical_partitions = Options.floorplan_num_vertical_partitions; PlacerOpts->place_quench_only = Options.place_quench_only; - PlacerOpts->seed = Options.Seed; + PlacerOpts->seed = Options.seed; PlacerOpts->placer_debug_block = Options.placer_debug_block; PlacerOpts->placer_debug_net = Options.placer_debug_net; diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index dcd0d2394c9..f76a760b207 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -753,7 +753,7 @@ void vpr_load_packing(const t_vpr_setup& vpr_setup, const t_arch& arch) { // constraints with the new information. g_vpr_ctx.mutable_floorplanning().update_floorplanning_context_post_pack(); - /* Sanity check the resulting netlist */ + // Sanity check the resulting netlist check_netlist(vpr_setup.PackerOpts.pack_verbosity); // Independently verify the clusterings to ensure the clustering can be @@ -886,7 +886,7 @@ void vpr_place(const Netlist<>& net_list, arch.directs, g_vpr_ctx.atom().flat_placement_info(), is_flat); - + auto& filename_opts = vpr_setup.FileNameOpts; auto& cluster_ctx = g_vpr_ctx.clustering(); const auto& block_locs = g_vpr_ctx.placement().block_locs(); diff --git a/vpr/src/pack/verify_clustering.cpp b/vpr/src/pack/verify_clustering.cpp index ec08e10a40b..93f925ef68b 100644 --- a/vpr/src/pack/verify_clustering.cpp +++ b/vpr/src/pack/verify_clustering.cpp @@ -406,7 +406,7 @@ unsigned verify_clustering(const ClusteredNetlist& clb_nlist, // Return here since this error can cause serious issues below. return num_errors; } - // Check conssitency between which clusters the atom's think thet are in and + // Check consistency between which clusters the atom's think thet are in and // which atoms the clusters think they have. num_errors += check_clustering_atom_consistency(clb_nlist, atom_nlist, diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index c5d46b5af3f..be5a34d2eb3 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -60,16 +60,16 @@ void try_place(const Netlist<>& net_list, VTR_LOG("Bounding box mode is %s\n", (mutable_placement.cube_bb ? "Cube" : "Per-layer")); VTR_LOG("\n"); - /* To make sure the importance of NoC-related cost terms compared to - * BB and timing cost is determine only through NoC placement weighting factor, - * we normalize NoC-related cost weighting factors so that they add up to 1. - * With this normalization, NoC-related cost weighting factors only determine - * the relative importance of NoC cost terms with respect to each other, while - * the importance of total NoC cost to conventional placement cost is determined - * by NoC placement weighting factor. - * FIXME: This should not be modifying the NoC Opts here, this normalization - * should occur when these Opts are loaded in. - */ + + // To make sure the importance of NoC-related cost terms compared to + // BB and timing cost is determine only through NoC placement weighting factor, + // we normalize NoC-related cost weighting factors so that they add up to 1. + // With this normalization, NoC-related cost weighting factors only determine + // the relative importance of NoC cost terms with respect to each other, while + // the importance of total NoC cost to conventional placement cost is determined + // by NoC placement weighting factor. + // FIXME: This should not be modifying the NoC Opts here, this normalization + // should occur when these Opts are loaded in. if (noc_opts.noc) { normalize_noc_cost_weighting_factor(const_cast(noc_opts)); } @@ -95,10 +95,9 @@ void try_place(const Netlist<>& net_list, } } - /* Make the global instance of BlkLocRegistry inaccessible through the getter methods of the - * placement context. This is done to make sure that the placement stage only accesses its - * own local instances of BlkLocRegistry. - */ + // Make the global instance of BlkLocRegistry inaccessible through the getter methods of the + // placement context. This is done to make sure that the placement stage only accesses its + // own local instances of BlkLocRegistry. mutable_placement.lock_loc_vars(); /* Start measuring placement time. The measured execution time will be printed @@ -154,7 +153,7 @@ static void update_screen_debug(); //Performs a major (i.e. interactive) placement screen update. //This function with no arguments is useful for calling from a debugger to -//look at the intermediate implemetnation state. +//look at the intermediate implementation state. static void update_screen_debug() { update_screen(ScreenUpdatePriority::MAJOR, "DEBUG", PLACEMENT, nullptr); } From 12b597944d6098c9e855ecdc29db246597cfe6a4 Mon Sep 17 00:00:00 2001 From: Soheil Shahrouz Date: Mon, 21 Jul 2025 16:35:59 -0400 Subject: [PATCH 64/66] make format --- vpr/src/base/read_options.cpp | 10 +++++----- vpr/src/base/read_options.h | 2 -- vpr/src/base/setup_vpr.cpp | 2 +- vpr/src/base/vpr_api.cpp | 2 +- vpr/src/place/place.cpp | 1 - 5 files changed, 7 insertions(+), 10 deletions(-) diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 912455aabfc..d0d588ef424 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -2485,11 +2485,11 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio auto& place_timing_grp = parser.add_argument_group("timing-driven placement options"); place_timing_grp.add_argument(args.place_timing_tradeoff, "--timing_tradeoff") - .help( - "Trade-off control between delay and wirelength during placement." - " 0.0 focuses completely on wirelength, 1.0 completely on timing") - .default_value("0.5") - .show_in(argparse::ShowIn::HELP_ONLY); + .help( + "Trade-off control between delay and wirelength during placement." + " 0.0 focuses completely on wirelength, 1.0 completely on timing") + .default_value("0.5") + .show_in(argparse::ShowIn::HELP_ONLY); place_timing_grp.add_argument(args.place_congestion_factor, "--congestion_factor") .help("Weighting factor for congestion cost during placement. " diff --git a/vpr/src/base/read_options.h b/vpr/src/base/read_options.h index c0efe2ed503..780532cb165 100644 --- a/vpr/src/base/read_options.h +++ b/vpr/src/base/read_options.h @@ -186,8 +186,6 @@ struct t_options { argparse::ArgValue noc_sat_routing_log_search_progress; argparse::ArgValue noc_placement_file_name; - - // Timing-driven placement options only argparse::ArgValue place_congestion_factor; argparse::ArgValue place_congestion_rlim_trigger_ratio; diff --git a/vpr/src/base/setup_vpr.cpp b/vpr/src/base/setup_vpr.cpp index da0ce231aa6..ecc4a1706d6 100644 --- a/vpr/src/base/setup_vpr.cpp +++ b/vpr/src/base/setup_vpr.cpp @@ -669,7 +669,7 @@ static void setup_placer_opts(const t_options& Options, t_placer_opts* PlacerOpt PlacerOpts->place_chan_width = Options.place_chan_width; PlacerOpts->recompute_crit_iter = Options.recompute_crit_iter; - + PlacerOpts->timing_tradeoff = Options.place_timing_tradeoff; PlacerOpts->congestion_factor = Options.place_congestion_factor; PlacerOpts->congestion_rlim_trigger_ratio = Options.place_congestion_rlim_trigger_ratio; diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index f76a760b207..f2eb38e3927 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -886,7 +886,7 @@ void vpr_place(const Netlist<>& net_list, arch.directs, g_vpr_ctx.atom().flat_placement_info(), is_flat); - + auto& filename_opts = vpr_setup.FileNameOpts; auto& cluster_ctx = g_vpr_ctx.clustering(); const auto& block_locs = g_vpr_ctx.placement().block_locs(); diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index be5a34d2eb3..7d7462ed707 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -60,7 +60,6 @@ void try_place(const Netlist<>& net_list, VTR_LOG("Bounding box mode is %s\n", (mutable_placement.cube_bb ? "Cube" : "Per-layer")); VTR_LOG("\n"); - // To make sure the importance of NoC-related cost terms compared to // BB and timing cost is determine only through NoC placement weighting factor, // we normalize NoC-related cost weighting factors so that they add up to 1. From c816b60225f01bec33d9840b7dcf2a9a286285d7 Mon Sep 17 00:00:00 2001 From: Soheil Shahrouz Date: Mon, 21 Jul 2025 16:37:55 -0400 Subject: [PATCH 65/66] make format --- vpr/src/base/vpr_api.cpp | 1 - vpr/src/base/vpr_types.h | 1 - vpr/src/place/place.cpp | 1 - 3 files changed, 3 deletions(-) diff --git a/vpr/src/base/vpr_api.cpp b/vpr/src/base/vpr_api.cpp index d3a8ec901c3..17c6df68327 100644 --- a/vpr/src/base/vpr_api.cpp +++ b/vpr/src/base/vpr_api.cpp @@ -887,7 +887,6 @@ void vpr_place(const Netlist<>& net_list, g_vpr_ctx.atom().flat_placement_info(), is_flat); - auto& filename_opts = vpr_setup.FileNameOpts; auto& cluster_ctx = g_vpr_ctx.clustering(); const auto& block_locs = g_vpr_ctx.placement().block_locs(); diff --git a/vpr/src/base/vpr_types.h b/vpr/src/base/vpr_types.h index e9c49f4f281..70449e6fb47 100644 --- a/vpr/src/base/vpr_types.h +++ b/vpr/src/base/vpr_types.h @@ -382,7 +382,6 @@ enum class e_sched_type { USER_SCHED }; - // What's on screen? enum pic_type { NO_PICTURE, diff --git a/vpr/src/place/place.cpp b/vpr/src/place/place.cpp index 6133c89fff1..9e573623385 100644 --- a/vpr/src/place/place.cpp +++ b/vpr/src/place/place.cpp @@ -63,7 +63,6 @@ void try_place(const Netlist<>& net_list, VTR_LOG("Bounding box mode is %s\n", (mutable_placement.cube_bb ? "Cube" : "Per-layer")); VTR_LOG("\n"); - // To make sure the importance of NoC-related cost terms compared to // BB and timing cost is determine only through NoC placement weighting factor, // we normalize NoC-related cost weighting factors so that they add up to 1. From 3bf72daec0cce2a41df07b65eff7745a357102c9 Mon Sep 17 00:00:00 2001 From: Soheil Shahrouz Date: Mon, 21 Jul 2025 17:18:17 -0400 Subject: [PATCH 66/66] fix the issue with choices for --place_frequency --- vpr/src/base/read_options.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vpr/src/base/read_options.cpp b/vpr/src/base/read_options.cpp index 2d5b5d39ba3..e13c7768b17 100644 --- a/vpr/src/base/read_options.cpp +++ b/vpr/src/base/read_options.cpp @@ -2378,7 +2378,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio place_grp.add_argument(args.place_placement_freq, "--place_frequency") .help("Run placement every time or only once during channel width search.") .default_value("once") - .choices({"once, always"}) + .choices({"once", "always"}) .show_in(argparse::ShowIn::HELP_ONLY); place_grp.add_argument(args.RL_agent_placement, "--RL_agent_placement") @@ -2527,7 +2527,7 @@ argparse::ArgumentParser create_arg_parser(const std::string& prog_name, t_optio " 0.0 focuses completely on wirelength, 1.0 completely on timing") .default_value("0.5") .show_in(argparse::ShowIn::HELP_ONLY); - + place_timing_grp.add_argument(args.place_congestion_factor, "--congestion_factor") .help("Weighting factor for congestion cost during placement. " "Higher values prioritize congestion avoidance over bounding box and timing costs. "