| 
 | 1 | +#!/usr/bin/env python3  | 
 | 2 | + | 
 | 3 | +""" This module controls and parses the large runs that includes  | 
 | 4 | +sweeping multiple parameters. """  | 
 | 5 | +import itertools  | 
 | 6 | +import os  | 
 | 7 | +import sys  | 
 | 8 | +import csv  | 
 | 9 | +import pandas as pd  | 
 | 10 | +import numpy as np  | 
 | 11 | +from scipy import stats  | 
 | 12 | + | 
 | 13 | +# Define the global dictionary  | 
 | 14 | +PARAMS_DICT = {  | 
 | 15 | +    "--seed": [1, 2],  | 
 | 16 | +    "--place_algorithm": ["criticality_timing"],  | 
 | 17 | +    "--place_agent_epsilon": [0.3],  | 
 | 18 | +}  | 
 | 19 | + | 
 | 20 | +# Set to True if you only care about specific metrics  | 
 | 21 | +KEEP_METRICS_ONLY = True  | 
 | 22 | +PARSED_METRICS = ["num_io", "num_LAB"]  | 
 | 23 | + | 
 | 24 | + | 
 | 25 | +def safe_gmean(series):  | 
 | 26 | +    """Calculate the geomeans of a series in a safe way even for large numbers"""  | 
 | 27 | +    series = series.replace({0: np.nan})  | 
 | 28 | +    return stats.gmean(series.dropna())  | 
 | 29 | + | 
 | 30 | + | 
 | 31 | +def generate_combinations():  | 
 | 32 | +    """Generates all the parameter combinations between the input parameters values."""  | 
 | 33 | +    keys = list(PARAMS_DICT.keys())  | 
 | 34 | +    values = list(PARAMS_DICT.values())  | 
 | 35 | +    combinations = list(itertools.product(*values))  | 
 | 36 | + | 
 | 37 | +    lines = []  | 
 | 38 | +    for combination in combinations:  | 
 | 39 | +        params_str = " ".join(f"{key} {value}" for key, value in zip(keys, combination))  | 
 | 40 | +        lines.append(f"script_params_list_add={params_str}\n")  | 
 | 41 | +    return lines  | 
 | 42 | + | 
 | 43 | + | 
 | 44 | +def parse_results(input_path):  | 
 | 45 | +    """  | 
 | 46 | +    Parse the output results  | 
 | 47 | +    """  | 
 | 48 | +    # Find the runXXX directory with the largest XXX  | 
 | 49 | +    run_dirs = [  | 
 | 50 | +        d for d in os.listdir(input_path) if d.startswith("run") and d[3:].isdigit()  | 
 | 51 | +    ]  | 
 | 52 | +    if not run_dirs:  | 
 | 53 | +        print("No runXXX directories found in the specified input path.")  | 
 | 54 | +        sys.exit(1)  | 
 | 55 | + | 
 | 56 | +    largest_run_path = os.path.join(input_path, max(run_dirs, key=lambda d: int(d[3:])))  | 
 | 57 | + | 
 | 58 | +    # Path to parse_results.txt and full_res.csv  | 
 | 59 | +    full_res_csv_path = os.path.join(largest_run_path, "full_res.csv")  | 
 | 60 | + | 
 | 61 | +    if not os.path.exists(os.path.join(largest_run_path, "parse_results.txt")):  | 
 | 62 | +        print("File parse_results.txt not found.")  | 
 | 63 | +        sys.exit(1)  | 
 | 64 | + | 
 | 65 | +    # Read the parse_results.txt file and write to full_res.csv  | 
 | 66 | +    with open(  | 
 | 67 | +        os.path.join(largest_run_path, "parse_results.txt"), "r"  | 
 | 68 | +    ) as txt_file, open(full_res_csv_path, "w", newline="") as csv_file:  | 
 | 69 | +        reader = csv.reader(txt_file, delimiter="\t")  | 
 | 70 | +        writer = csv.writer(csv_file)  | 
 | 71 | + | 
 | 72 | +        headers = next(reader)  | 
 | 73 | +        script_params_index = headers.index("script_params")  | 
 | 74 | + | 
 | 75 | +        # Create new headers with PARAMS_DICT keys  | 
 | 76 | +        new_headers = (  | 
 | 77 | +            headers[:script_params_index]  | 
 | 78 | +            + list(PARAMS_DICT.keys())  | 
 | 79 | +            + headers[script_params_index + 1 :]  | 
 | 80 | +        )  | 
 | 81 | +        writer.writerow(new_headers)  | 
 | 82 | + | 
 | 83 | +        for row in reader:  | 
 | 84 | +            script_params_value = row[script_params_index]  | 
 | 85 | +            script_params_dict = parse_script_params(script_params_value)  | 
 | 86 | +            new_row = (  | 
 | 87 | +                row[:script_params_index]  | 
 | 88 | +                + [script_params_dict.get(key, "") for key in PARAMS_DICT]  | 
 | 89 | +                + row[script_params_index + 1 :]  | 
 | 90 | +            )  | 
 | 91 | +            writer.writerow(new_row)  | 
 | 92 | + | 
 | 93 | +    print(f"Converted parse_results.txt to {full_res_csv_path}")  | 
 | 94 | + | 
 | 95 | +    # Generate avg_seed.csv if --seed column exists  | 
 | 96 | +    generate_avg_seed_csv(full_res_csv_path, largest_run_path)  | 
 | 97 | +    print("Generated average seed results")  | 
 | 98 | + | 
 | 99 | +    # Generate gmean_res.csv  | 
 | 100 | +    generate_geomean_res_csv(  | 
 | 101 | +        os.path.join(largest_run_path, "avg_seed.csv"), largest_run_path  | 
 | 102 | +    )  | 
 | 103 | +    print("Generated geometric average results over all the circuits")  | 
 | 104 | + | 
 | 105 | +    generate_xlsx(largest_run_path)  | 
 | 106 | +    print("Generated xlsx that merges all the result csv files")  | 
 | 107 | + | 
 | 108 | + | 
 | 109 | +def generate_xlsx(largest_run_path):  | 
 | 110 | +    """Generate a xlsx file that includes the full results, average results over the seed  | 
 | 111 | +    and the geometrically averaged results over all the benchmarks."""  | 
 | 112 | + | 
 | 113 | +    csv_files = [  | 
 | 114 | +        os.path.join(largest_run_path, "full_res.csv"),  | 
 | 115 | +        os.path.join(largest_run_path, "avg_seed.csv"),  | 
 | 116 | +        os.path.join(largest_run_path, "geomean_res.csv"),  | 
 | 117 | +    ]  | 
 | 118 | +    sheet_names = ["Full res", "Avg. seeds", "Summary"]  | 
 | 119 | +    output_excel_file = os.path.join(largest_run_path, "summary.xlsx")  | 
 | 120 | +    # Create an Excel writer object  | 
 | 121 | +    # pylint: disable=abstract-class-instantiated  | 
 | 122 | +    with pd.ExcelWriter(output_excel_file, engine="xlsxwriter") as writer:  | 
 | 123 | +        for csv_file, sheet_name in zip(csv_files, sheet_names):  | 
 | 124 | +            # Read each CSV file  | 
 | 125 | +            df = pd.read_csv(csv_file)  | 
 | 126 | + | 
 | 127 | +            # Write each DataFrame to a different sheet  | 
 | 128 | +            df.to_excel(writer, sheet_name=sheet_name, index=False)  | 
 | 129 | + | 
 | 130 | + | 
 | 131 | +def parse_script_params(script_params):  | 
 | 132 | +    """Helper function to parse the script params values from earch row in  | 
 | 133 | +    the parse_results.txt"""  | 
 | 134 | + | 
 | 135 | +    parsed_params = {key: "" for key in PARAMS_DICT}  | 
 | 136 | + | 
 | 137 | +    parts = script_params.split("_")  | 
 | 138 | +    i = 0  | 
 | 139 | + | 
 | 140 | +    while i < len(parts):  | 
 | 141 | +        for key in PARAMS_DICT:  | 
 | 142 | +            key_parts = key.split("_")  | 
 | 143 | +            key_length = len(key_parts)  | 
 | 144 | + | 
 | 145 | +            if parts[i : i + key_length] == key_parts:  | 
 | 146 | +                value_parts = []  | 
 | 147 | +                j = i + key_length  | 
 | 148 | + | 
 | 149 | +                while j < len(parts) and not any(  | 
 | 150 | +                    parts[j : j + len(k.split("_"))] == k.split("_")  | 
 | 151 | +                    for k in PARAMS_DICT  | 
 | 152 | +                ):  | 
 | 153 | +                    value_parts.append(parts[j])  | 
 | 154 | +                    j += 1  | 
 | 155 | + | 
 | 156 | +                parsed_params[key] = "_".join(value_parts)  | 
 | 157 | +                i = j - 1  | 
 | 158 | +                break  | 
 | 159 | + | 
 | 160 | +        i += 1  | 
 | 161 | + | 
 | 162 | +    return parsed_params  | 
 | 163 | + | 
 | 164 | + | 
 | 165 | +def generate_avg_seed_csv(full_res_csv_path, output_dir):  | 
 | 166 | +    """Generate the average results over the seeds"""  | 
 | 167 | +    df = pd.read_csv(full_res_csv_path)  | 
 | 168 | +    assert isinstance(df, pd.DataFrame)  | 
 | 169 | + | 
 | 170 | +    if KEEP_METRICS_ONLY:  | 
 | 171 | +        col_to_keep = ["circuit", "arch"]  | 
 | 172 | +        col_to_keep.extend(list(PARAMS_DICT.keys()))  | 
 | 173 | +        col_to_keep.extend(PARSED_METRICS)  | 
 | 174 | +        df = df.drop(  | 
 | 175 | +            # pylint: disable=no-member  | 
 | 176 | +            columns=[col for col in df.columns if col not in col_to_keep]  | 
 | 177 | +        )  | 
 | 178 | + | 
 | 179 | +    # Check if '--seed' column is present  | 
 | 180 | +    if "--seed" in df.columns:  | 
 | 181 | +        # Determine the grouping keys: ['circuit', 'arch'] + keys from PARAMS_DICT that  | 
 | 182 | +        # are present in the dataframe  | 
 | 183 | +        grouping_keys = ["circuit", "arch"] + [  | 
 | 184 | +            key for key in PARAMS_DICT if key in df.columns and key != "--seed"  | 
 | 185 | +        ]  | 
 | 186 | + | 
 | 187 | +        # Group by specified keys and compute the mean for numeric columns  | 
 | 188 | +        df_grouped = df.groupby(grouping_keys).mean(numeric_only=True).reset_index()  | 
 | 189 | + | 
 | 190 | +        # Drop the '--seed' column if it exists  | 
 | 191 | +        if "--seed" in df_grouped.columns:  | 
 | 192 | +            df_grouped.drop(columns=["--seed"], inplace=True)  | 
 | 193 | +    else:  | 
 | 194 | +        df_grouped = df  | 
 | 195 | + | 
 | 196 | +    # Save the resulting dataframe to a CSV file  | 
 | 197 | +    avg_seed_csv_path = os.path.join(output_dir, "avg_seed.csv")  | 
 | 198 | +    df_grouped.to_csv(avg_seed_csv_path, index=False)  | 
 | 199 | + | 
 | 200 | + | 
 | 201 | +def generate_geomean_res_csv(full_res_csv_path, output_dir):  | 
 | 202 | +    """Generate the geometric average results over the different circuits"""  | 
 | 203 | + | 
 | 204 | +    df = pd.read_csv(full_res_csv_path)  | 
 | 205 | + | 
 | 206 | +    param_columns = [key for key in PARAMS_DICT if key != "--seed"]  | 
 | 207 | +    non_param_columns = [col for col in df.columns if col not in param_columns]  | 
 | 208 | + | 
 | 209 | +    # pylint: disable=no-member  | 
 | 210 | +    geomean_df = (  | 
 | 211 | +        df.groupby(param_columns)  | 
 | 212 | +        .agg(  | 
 | 213 | +            {  | 
 | 214 | +                col: (lambda x: "" if x.dtype == "object" else safe_gmean(x))  | 
 | 215 | +                for col in non_param_columns  | 
 | 216 | +            }  | 
 | 217 | +        )  | 
 | 218 | +        .reset_index()  | 
 | 219 | +    )  | 
 | 220 | + | 
 | 221 | +    geomean_df.drop(columns=["circuit"], inplace=True)  | 
 | 222 | +    geomean_df.drop(columns=["arch"], inplace=True)  | 
 | 223 | + | 
 | 224 | +    geomean_res_csv_path = os.path.join(output_dir, "geomean_res.csv")  | 
 | 225 | +    geomean_df.to_csv(geomean_res_csv_path, index=False)  | 
 | 226 | + | 
 | 227 | + | 
 | 228 | +def main():  | 
 | 229 | +    """Main function"""  | 
 | 230 | + | 
 | 231 | +    if len(sys.argv) < 3:  | 
 | 232 | +        print("Usage: script.py <option> <path_to_directory>")  | 
 | 233 | +        sys.exit(1)  | 
 | 234 | + | 
 | 235 | +    option = sys.argv[1]  | 
 | 236 | +    directory_path = sys.argv[2]  | 
 | 237 | + | 
 | 238 | +    if option == "--generate":  | 
 | 239 | +        # Generate the combinations  | 
 | 240 | +        lines = generate_combinations()  | 
 | 241 | + | 
 | 242 | +        # Define the path to the config file  | 
 | 243 | +        config_path = os.path.join(directory_path, "config", "config.txt")  | 
 | 244 | + | 
 | 245 | +        # Ensure the config directory exists  | 
 | 246 | +        os.makedirs(os.path.dirname(config_path), exist_ok=True)  | 
 | 247 | + | 
 | 248 | +        # Append the lines to the config file  | 
 | 249 | +        with open(config_path, "a") as file:  | 
 | 250 | +            file.writelines(lines)  | 
 | 251 | + | 
 | 252 | +        print(f"Appended lines to {config_path}")  | 
 | 253 | + | 
 | 254 | +    elif option == "--parse":  | 
 | 255 | +        parse_results(directory_path)  | 
 | 256 | + | 
 | 257 | +    else:  | 
 | 258 | +        print("Invalid option. Use --generate or --parse")  | 
 | 259 | +        sys.exit(1)  | 
 | 260 | + | 
 | 261 | + | 
 | 262 | +if __name__ == "__main__":  | 
 | 263 | +    main()  | 
0 commit comments