From db9f217f95ae268c9a92bbb065296a351cea76d3 Mon Sep 17 00:00:00 2001 From: Flavia Beo Date: Tue, 15 Jul 2025 13:22:31 -0300 Subject: [PATCH 01/20] hf_mode wip Signed-off-by: Flavia Beo --- scripts/generate_layers_metrics.py | 82 +++++++++++++++++++----------- 1 file changed, 52 insertions(+), 30 deletions(-) diff --git a/scripts/generate_layers_metrics.py b/scripts/generate_layers_metrics.py index 48d41f62..70787411 100644 --- a/scripts/generate_layers_metrics.py +++ b/scripts/generate_layers_metrics.py @@ -11,6 +11,8 @@ from fms.models import get_model from fms.utils.generation import generate +from transformers import AutoModelForCausalLM, AutoTokenizer + from aiu_fms_testing_utils.testing.validation import get_default_validation_prefix from aiu_fms_testing_utils.utils import prepare_inputs @@ -149,22 +151,27 @@ def __infer_layer(model, max_len, device, max_new_tokens, batch_size, tokenizer) if "generate" in mode: with torch.no_grad(): - result = generate( - model, - ids, - max_new_tokens=max_new_tokens, - use_cache=use_cache, - do_sample=do_sample, - max_seq_len=max_seq_len, - timing="e2e", - eos_token_id=None, - contiguous_cache=True, - extra_kwargs={}, - ) - result, timings = result - logger.info(f"Generation completed: Result len is {len(result)}") - if len(result.shape) == 1: - result = result.unsqueeze(0) + # result = generate( + # model, + # ids, + # max_new_tokens=max_new_tokens, + # use_cache=use_cache, + # do_sample=do_sample, + # max_seq_len=max_seq_len, + # timing="e2e", + # eos_token_id=None, + # contiguous_cache=True, + # extra_kwargs={}, + # ) + # result, timings = result + # logger.info(f"Generation completed: Result len is {len(result)}") + # if len(result.shape) == 1: + # result = result.unsqueeze(0) + model.generate(**ids, + max_length=max_seq_len, + max_new_tokens=max_new_token, + do_sample=do_sample, + use_cache=use_cache) else: result = model.forward( ids, @@ -334,21 +341,36 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens): tokenizer = tokenizers.get_tokenizer(model_path) - # prepare the cpu model - validation_model = get_model( - device_type="cpu", - data_type=torch.float32, - fused_weights=False, - **get_model_kwargs, - ) + device = "auto" + model_path = "ibm-granite/granite-3.3-8b-base" + tokenizer = AutoTokenizer.from_pretrained(model_path) - # prepare the cuda model - validation_model_cuda = get_model( - device_type="cuda", - data_type=torch.float16, - fused_weights=False, - **get_model_kwargs, - ) + # drop device_map if running on CPU + validation_model = AutoModelForCausalLM.from_pretrained(model_path, + device_map="cpu", + torch_dtype=torch.float32 + ) + + validation_model_cuda = AutoModelForCausalLM.from_pretrained(model_path, + device_map="cuda", + torch_dtype=torch.float16 + ) + + # prepare the cpu model + # validation_model = get_model( + # device_type="cpu", + # data_type=torch.float32, + # fused_weights=False, + # **get_model_kwargs, + # ) + + # # prepare the cuda model + # validation_model_cuda = get_model( + # device_type="cuda", + # data_type=torch.float16, + # fused_weights=False, + # **get_model_kwargs, + # ) layer_stack_cpu = __register_call_layers(model=validation_model, batch_size=batch_size, From 894ebd63429bd8fce959516ae6f40eb285cc1d73 Mon Sep 17 00:00:00 2001 From: Flavia Beo Date: Tue, 15 Jul 2025 14:19:29 -0300 Subject: [PATCH 02/20] Adds arg parse for loader mode Signed-off-by: Flavia Beo --- scripts/generate_layers_metrics.py | 115 ++++++++++++++++------------- 1 file changed, 62 insertions(+), 53 deletions(-) diff --git a/scripts/generate_layers_metrics.py b/scripts/generate_layers_metrics.py index 70787411..639b885f 100644 --- a/scripts/generate_layers_metrics.py +++ b/scripts/generate_layers_metrics.py @@ -49,6 +49,13 @@ required=True, help="Sets the output generation mode." ) +parser.add_argument( + "--model_loader", + choices=["fms", "hf"], + default="fms", + required=True, + help="Which model loader/runner to be used; fms - IBM's Foundation Model Stack or hf - HuggingFace Transformers." +) parser.add_argument( "--batch_sizes", type=str, @@ -136,6 +143,7 @@ def __infer_layer(model, max_len, device, max_new_tokens, batch_size, tokenizer) do_sample = False use_cache = True + result = None prompts = prepare_inputs(batch_size, max_len, tokenizer, sharegpt_path) ids, pad_input_ids = prompts @@ -151,27 +159,29 @@ def __infer_layer(model, max_len, device, max_new_tokens, batch_size, tokenizer) if "generate" in mode: with torch.no_grad(): - # result = generate( - # model, - # ids, - # max_new_tokens=max_new_tokens, - # use_cache=use_cache, - # do_sample=do_sample, - # max_seq_len=max_seq_len, - # timing="e2e", - # eos_token_id=None, - # contiguous_cache=True, - # extra_kwargs={}, - # ) - # result, timings = result - # logger.info(f"Generation completed: Result len is {len(result)}") - # if len(result.shape) == 1: - # result = result.unsqueeze(0) - model.generate(**ids, - max_length=max_seq_len, - max_new_tokens=max_new_token, - do_sample=do_sample, - use_cache=use_cache) + if args.model_loader == "fms": + result = generate( + model, + ids, + max_new_tokens=max_new_tokens, + use_cache=use_cache, + do_sample=do_sample, + max_seq_len=max_seq_len, + timing="e2e", + eos_token_id=None, + contiguous_cache=True, + extra_kwargs={}, + ) + if args.model_loader == "hf": + result = model.generate(ids, + max_length=max_len, + max_new_tokens=max_new_token, + do_sample=do_sample, + use_cache=use_cache) + result, timings = result + logger.info(f"Generation completed: Result len is {len(result)}") + if len(result.shape) == 1: + result = result.unsqueeze(0) else: result = model.forward( ids, @@ -339,38 +349,37 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens): **micro_model_kwargs, } - tokenizer = tokenizers.get_tokenizer(model_path) - - device = "auto" - model_path = "ibm-granite/granite-3.3-8b-base" - tokenizer = AutoTokenizer.from_pretrained(model_path) - - # drop device_map if running on CPU - validation_model = AutoModelForCausalLM.from_pretrained(model_path, - device_map="cpu", - torch_dtype=torch.float32 - ) - - validation_model_cuda = AutoModelForCausalLM.from_pretrained(model_path, - device_map="cuda", - torch_dtype=torch.float16 - ) - - # prepare the cpu model - # validation_model = get_model( - # device_type="cpu", - # data_type=torch.float32, - # fused_weights=False, - # **get_model_kwargs, - # ) - - # # prepare the cuda model - # validation_model_cuda = get_model( - # device_type="cuda", - # data_type=torch.float16, - # fused_weights=False, - # **get_model_kwargs, - # ) + if args.model_loader == "hf": + tokenizer = AutoTokenizer.from_pretrained(model_path) + + # prepare the cpu model + validation_model = AutoModelForCausalLM.from_pretrained(model_path, + device_map="cpu", + torch_dtype=torch.float32 + ) + # prepare the cuda model + validation_model_cuda = AutoModelForCausalLM.from_pretrained(model_path, + device_map="cuda", + torch_dtype=torch.float16 + ) + if args.model_loader == "fms": + tokenizer = tokenizers.get_tokenizer(model_path) + + # prepare the cpu model + validation_model = get_model( + device_type="cpu", + data_type=torch.float32, + fused_weights=False, + **get_model_kwargs, + ) + + # prepare the cuda model + validation_model_cuda = get_model( + device_type="cuda", + data_type=torch.float16, + fused_weights=False, + **get_model_kwargs, + ) layer_stack_cpu = __register_call_layers(model=validation_model, batch_size=batch_size, From 719f29dd56d1df399a6ca9006cebea293276b3b6 Mon Sep 17 00:00:00 2001 From: Flavia Beo Date: Tue, 15 Jul 2025 14:25:42 -0300 Subject: [PATCH 03/20] Max lenght for hf fixed Signed-off-by: Flavia Beo --- scripts/generate_layers_metrics.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/scripts/generate_layers_metrics.py b/scripts/generate_layers_metrics.py index 639b885f..2ab49f24 100644 --- a/scripts/generate_layers_metrics.py +++ b/scripts/generate_layers_metrics.py @@ -151,7 +151,9 @@ def __infer_layer(model, max_len, device, max_new_tokens, batch_size, tokenizer) if "cuda" in device: ids = ids.to("cuda") - if hasattr(model.config, "ntk_scaling") and model.config.ntk_scaling: + if args.model_loader == "hf": + max_seq_len = max_len + elif hasattr(model.config, "ntk_scaling") and model.config.ntk_scaling: max_seq_len = max(max_len, model.config.max_expected_seq_len) else: # without ntk scaling, extending the seq length too far gives bogus results. @@ -174,7 +176,7 @@ def __infer_layer(model, max_len, device, max_new_tokens, batch_size, tokenizer) ) if args.model_loader == "hf": result = model.generate(ids, - max_length=max_len, + max_length=max_seq_len, max_new_tokens=max_new_token, do_sample=do_sample, use_cache=use_cache) From da5696c39678808c4097a4104b1c0d487094df40 Mon Sep 17 00:00:00 2001 From: Flavia Beo Date: Tue, 15 Jul 2025 14:37:31 -0300 Subject: [PATCH 04/20] Adds arg usage to docs Signed-off-by: Flavia Beo --- tests/LAYERS.md | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/tests/LAYERS.md b/tests/LAYERS.md index ae1ccde0..42389d66 100644 --- a/tests/LAYERS.md +++ b/tests/LAYERS.md @@ -19,7 +19,7 @@ The idea is to run, the prompts through the model with the pre- and post-hooks a The script [generate_layers_metrics.py](../scripts/generate_layers_metrics.py) requires the following arguments to be run: ```bash -usage: generate_layers_metrics.py [-h] [--architecture ARCHITECTURE] [--variant VARIANT] [--model_path MODEL_PATH] --mode {generate,model-forward} --batch_sizes BATCH_SIZES --seq_lengths SEQ_LENGTHS --max_new_tokens MAX_NEW_TOKENS [--output_path OUTPUT_PATH] [--sharegpt_path SHAREGPT_PATH] +usage: generate_layers_metrics.py [-h] [--architecture ARCHITECTURE] [--variant VARIANT] [--model_path MODEL_PATH] --mode {generate,model-forward} --model_loader {fms,hf} --batch_sizes BATCH_SIZES --seq_lengths SEQ_LENGTHS --max_new_tokens MAX_NEW_TOKENS [--output_path OUTPUT_PATH] [--sharegpt_path SHAREGPT_PATH] Script to generate the model's metrics by layer @@ -32,6 +32,8 @@ options: Paths to the directory containing model's weights (.pth files sharded by tensor parallel rank, not HF weights) --mode {generate,model-forward} Sets the output generation mode. + --model_loader {fms,hf} + Which model loader/runner to be used; fms - IBM's Foundation Model Stack or hf - HuggingFace Transformers. --batch_sizes BATCH_SIZES Batch sizes separated by comma. Eg.: 1,2 --seq_lengths SEQ_LENGTHS @@ -79,7 +81,7 @@ cd aiu-fms-testing-utils/tests/resources mkdir /tmp/output -python3 generate_layers_metrics.py --mode model-forward --variant ibm-granite/granite-3.2-8b-instruct --architecture hf_pretrained --batch_sizes 1 --seq_lengths 64 --max_new_tokens 128 +python3 generate_layers_metrics.py --mode model-forward --variant ibm-granite/granite-3.2-8b-instruct --architecture hf_pretrained --batch_sizes 1 --seq_lengths 64 --max_new_tokens 128 --model_loader fms ``` The files should get created at `/tmp/output` dir: ```bash @@ -95,7 +97,7 @@ To get the second step of the flow and get the thresholds by layer, run: ```bash cd /aiu-fms-testing-utils/tests/resources -python3 get_thresholds.py --models ibm-granite/granite-3.2-8b-instruct --metrics abs_diff cos_sim_avg cos_sim_men --file_base /tmp/output --layer_io +python3 get_thresholds.py --models ibm-granite/granite-3.2-8b-instruct --metrics abs_diff cos_sim_avg cos_sim_mean --file_base /tmp/output --layer_io ``` It should print the metric of each layer: ```bash From 3d30a85cee9b4fe32ea595f94dff2d94a64fe8b3 Mon Sep 17 00:00:00 2001 From: Flavia Beo Date: Tue, 15 Jul 2025 15:19:06 -0300 Subject: [PATCH 05/20] Fix result for fms generate inference Signed-off-by: Flavia Beo --- scripts/generate_layers_metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate_layers_metrics.py b/scripts/generate_layers_metrics.py index 2ab49f24..91f1c66c 100644 --- a/scripts/generate_layers_metrics.py +++ b/scripts/generate_layers_metrics.py @@ -174,13 +174,13 @@ def __infer_layer(model, max_len, device, max_new_tokens, batch_size, tokenizer) contiguous_cache=True, extra_kwargs={}, ) + result, timings = result if args.model_loader == "hf": result = model.generate(ids, max_length=max_seq_len, max_new_tokens=max_new_token, do_sample=do_sample, use_cache=use_cache) - result, timings = result logger.info(f"Generation completed: Result len is {len(result)}") if len(result.shape) == 1: result = result.unsqueeze(0) From d96367d121bb214fc9201655e990ee3f8acd5c43 Mon Sep 17 00:00:00 2001 From: Flavia Beo Date: Thu, 17 Jul 2025 15:13:19 -0300 Subject: [PATCH 06/20] save output layers to files Signed-off-by: Flavia Beo --- scripts/generate_layers_metrics.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/generate_layers_metrics.py b/scripts/generate_layers_metrics.py index 91f1c66c..60f5e60a 100644 --- a/scripts/generate_layers_metrics.py +++ b/scripts/generate_layers_metrics.py @@ -389,6 +389,8 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens): seq_length=seq_length, max_new_tokens=max_new_tokens, tokenizer=tokenizer) + torch.save(layer_stack_cpu, os.path.join(output_path, "layers-output", "layer_stack_cpu.pt")) + global generate_iters generate_iters = 0 logger.info(f"Finished registering CPU layers") @@ -398,6 +400,8 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens): device="cuda", seq_length=seq_length, max_new_tokens=max_new_tokens, tokenizer=tokenizer) + + torch.save(layer_stack_cuda, os.path.join(output_path, "layers-output", "layer_stack_cuda.pt")) assert len(layer_stack_cuda.keys()) == len(layer_stack_cpu.keys()) From e9b302582f195805871f0e3e2a915bfeac2c20ea Mon Sep 17 00:00:00 2001 From: Flavia Beo Date: Thu, 17 Jul 2025 15:30:10 -0300 Subject: [PATCH 07/20] save all logs Signed-off-by: Flavia Beo --- scripts/generate_layers_metrics.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/scripts/generate_layers_metrics.py b/scripts/generate_layers_metrics.py index 60f5e60a..7434b882 100644 --- a/scripts/generate_layers_metrics.py +++ b/scripts/generate_layers_metrics.py @@ -1,4 +1,5 @@ import os +import sys import time import logging import argparse @@ -19,10 +20,6 @@ from aiu_fms_testing_utils.utils.metrics_utils import tensor_abs_diff, tensor_cos_sim -logger = logging.getLogger(__name__) -LOG_LEVEL = os.environ.get('LOG_LEVEL', 'INFO').upper() -logging.basicConfig(level=LOG_LEVEL, format="%(asctime)s %(message)s") - parser = argparse.ArgumentParser( description="Script to generate the model's metrics by layer" ) @@ -95,6 +92,11 @@ output_path = args.output_path sharegpt_path = args.sharegpt_path +logger = logging.getLogger(__name__).addHandler(logging.StreamHandler(sys.stdout)) +LOG_LEVEL = os.environ.get('LOG_LEVEL', 'INFO').upper() +logging.basicConfig(level=LOG_LEVEL, format="%(asctime)s %(message)s", + filename=os.path.join(output_path, "layers-output", f"layers_input.log")) + common_model_paths = args.model_path if args.model_path else args.variant if isinstance(common_model_paths, str): common_model_paths = [str(bs) for bs in common_model_paths.split(",")] @@ -426,9 +428,7 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens): tensor_cuda_out = cuda_output[-1] tensor_cpu_out = cpu_output[-1] for i in range(len(cpu_output)): - logger.debug(f"inputs: {cuda_output[i].shape} {cpu_output[i].to('cuda').shape}") cos_sim.append(tensor_cos_sim(cuda_output[i], cpu_output[i].to('cuda'))) - logger.debug(f"cos_sim output:{tensor_cos_sim(cuda_output[i], cpu_output[i].to('cuda')).shape}") abs_diff.append(tensor_abs_diff(cuda_output[i], cpu_output[i].to('cuda'))) else: head_tensor_cpu = cpu_output[-1] @@ -438,16 +438,12 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens): for j in range(len(head_tensor_gpu[i])): tensor_cuda_out = head_tensor_gpu[i][j] tensor_cpu_out = head_tensor_cpu[i][j] - logger.debug(f"inputs: {head_tensor_gpu[i][j].shape} {head_tensor_cpu[i][j].to('cuda').shape}") cos_sim.append(tensor_cos_sim(head_tensor_cpu[i][j].to('cuda'), head_tensor_gpu[i][j])) - logger.debug(f"cos_sim output:{tensor_cos_sim(head_tensor_cpu[i][j].to('cuda'), head_tensor_gpu[i][j]).shape}") abs_diff.append(tensor_abs_diff(head_tensor_cpu[i][j].to('cuda'), head_tensor_gpu[i][j])) else: tensor_cuda_out = head_tensor_gpu[i] tensor_cpu_out = head_tensor_cpu[i] - logger.debug(f"inputs: {head_tensor_gpu[i].shape} {head_tensor_cpu[i].to('cuda').shape}") cos_sim.append(tensor_cos_sim(head_tensor_cpu[i].to('cuda'), head_tensor_gpu[i])) - logger.debug(f"cos_sim output:{tensor_cos_sim(head_tensor_cpu[i].to('cuda'), head_tensor_gpu[i]).shape}") abs_diff.append(tensor_abs_diff(head_tensor_cpu[i].to('cuda'), head_tensor_gpu[i])) else: tensor_cpu_out = cpu_output.to('cuda') From b742615aec27c7e1a68507a8fe1b74b5619d22aa Mon Sep 17 00:00:00 2001 From: Flavia Beo Date: Thu, 17 Jul 2025 15:56:52 -0300 Subject: [PATCH 08/20] save all logs Signed-off-by: Flavia Beo --- scripts/generate_layers_metrics.py | 26 ++++++++++++++++++++------ 1 file changed, 20 insertions(+), 6 deletions(-) diff --git a/scripts/generate_layers_metrics.py b/scripts/generate_layers_metrics.py index 7434b882..932b003c 100644 --- a/scripts/generate_layers_metrics.py +++ b/scripts/generate_layers_metrics.py @@ -92,10 +92,24 @@ output_path = args.output_path sharegpt_path = args.sharegpt_path -logger = logging.getLogger(__name__).addHandler(logging.StreamHandler(sys.stdout)) -LOG_LEVEL = os.environ.get('LOG_LEVEL', 'INFO').upper() -logging.basicConfig(level=LOG_LEVEL, format="%(asctime)s %(message)s", - filename=os.path.join(output_path, "layers-output", f"layers_input.log")) +if not os.path.exists(output_path): + os.makedirs(output_path) + +if not os.path.exists(os.path.join(output_path,"layers-input-output")): + os.makedirs(os.path.join(output_path,"layers-input-output")) + +logging.basicConfig(level=logging.DEBUG, + format='%(asctime)s %(name)-12s %(message)s', + datefmt='%m-%d %H:%M', + filename=os.path.join(output_path, "layers-input-output", f"layers_input.log"), + filemode='w') +console = logging.StreamHandler() +console.setLevel(os.getenv("LOG_LEVEL", "INFO")) +formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s') +console.setFormatter(formatter) +logging.getLogger('').addHandler(console) + +logger = logging.getLogger('generate_layers_metrics') common_model_paths = args.model_path if args.model_path else args.variant if isinstance(common_model_paths, str): @@ -391,7 +405,7 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens): seq_length=seq_length, max_new_tokens=max_new_tokens, tokenizer=tokenizer) - torch.save(layer_stack_cpu, os.path.join(output_path, "layers-output", "layer_stack_cpu.pt")) + torch.save(layer_stack_cpu, os.path.join(output_path, "layers-input-output", "layer_stack_cpu.pt")) global generate_iters generate_iters = 0 @@ -403,7 +417,7 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens): seq_length=seq_length, max_new_tokens=max_new_tokens, tokenizer=tokenizer) - torch.save(layer_stack_cuda, os.path.join(output_path, "layers-output", "layer_stack_cuda.pt")) + torch.save(layer_stack_cuda, os.path.join(output_path, "layers-input-output", "layer_stack_cuda.pt")) assert len(layer_stack_cuda.keys()) == len(layer_stack_cpu.keys()) From f6eaa78c4af0b4eb22d864d1ef7b65dc7e0d0751 Mon Sep 17 00:00:00 2001 From: Flavia Beo Date: Thu, 17 Jul 2025 16:55:05 -0300 Subject: [PATCH 09/20] Changes model prefix Signed-off-by: Flavia Beo --- aiu_fms_testing_utils/utils/metrics_utils.py | 35 +++++++++++++ scripts/generate_layers_metrics.py | 54 ++++++++++++++------ 2 files changed, 73 insertions(+), 16 deletions(-) diff --git a/aiu_fms_testing_utils/utils/metrics_utils.py b/aiu_fms_testing_utils/utils/metrics_utils.py index 9f011786..ac117717 100644 --- a/aiu_fms_testing_utils/utils/metrics_utils.py +++ b/aiu_fms_testing_utils/utils/metrics_utils.py @@ -3,6 +3,41 @@ import torch.nn as nn +def get_model_prefix(model_path, + shapes_size, + max_new_tokens: None, + batch_size: None, + seq_length: None, + dtype: None, + include_shapes: False): + """ + Generate a prefix for a model based on its path and other parameters. + + Args: + model_path (str): The path to the model file. + shapes_size (int): The size of the shapes array to use in the model. + max_new_tokens (int): The maximum number of new tokens to use in the model. + batch_size (int): The batch size to use in the model. + seq_length (int): The sequence length to use in the model. + dtype (str): The data type to use in the model. + include_shapes (bool): Include or not the shapes to the prefix. + Returns: + str: A prefix for the model based on its path and other parameters. + """ + if model_path.count("/") > 1: + # this means that the model_path does NOT match to the hf pattern + # Eg.: /home/another-dir/another/ibm-granite/granite-3.3-8b-base + model_prefix = model_path.split("/")[-2] + "--" + model_path.split("/")[-1] + else: + # this means that the model_path does match to the hf pattern + # Eg.: ibm-granite/granite-3.3-8b-base + model_prefix = model_path.replace("/", "--") + + if shapes_size > 1 or include_shapes: + model_prefix = f"{model_prefix}_max-new-tokens-{max_new_tokens}_batch-size-{batch_size}_seq-length-{seq_length}_dtype-{dtype}" + + return model_prefix + def abs_diff_linalg_norm(res_vector): """ Calculates the Euclidean norm (also known as the L2 norm) of a given array res_vector. This is equivalent to finding the square diff --git a/scripts/generate_layers_metrics.py b/scripts/generate_layers_metrics.py index 932b003c..7441011e 100644 --- a/scripts/generate_layers_metrics.py +++ b/scripts/generate_layers_metrics.py @@ -14,10 +14,8 @@ from transformers import AutoModelForCausalLM, AutoTokenizer -from aiu_fms_testing_utils.testing.validation import get_default_validation_prefix - from aiu_fms_testing_utils.utils import prepare_inputs -from aiu_fms_testing_utils.utils.metrics_utils import tensor_abs_diff, tensor_cos_sim +from aiu_fms_testing_utils.utils.metrics_utils import tensor_abs_diff, tensor_cos_sim, get_model_prefix parser = argparse.ArgumentParser( @@ -92,16 +90,13 @@ output_path = args.output_path sharegpt_path = args.sharegpt_path -if not os.path.exists(output_path): - os.makedirs(output_path) - -if not os.path.exists(os.path.join(output_path,"layers-input-output")): - os.makedirs(os.path.join(output_path,"layers-input-output")) +if not os.path.exists(os.path.join(output_path,"layers-input-output-logs")): + os.makedirs(os.path.join(output_path,"layers-input-output-logs")) logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(name)-12s %(message)s', datefmt='%m-%d %H:%M', - filename=os.path.join(output_path, "layers-input-output", f"layers_input.log"), + filename=os.path.join(output_path, "layers-input-output-logs", f"layers_input.log"), filemode='w') console = logging.StreamHandler() console.setLevel(os.getenv("LOG_LEVEL", "INFO")) @@ -339,7 +334,7 @@ def write_csv(values, path, metric, gpu_layer_shape, cpu_layer_shape, output_sha f.write(f"{values}\n") f.close() -def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens): +def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens, model_thresholds_folder): """ Generate metrics for layers in a given model. @@ -348,6 +343,7 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens): batch_size (int): The batch size used for inference. seq_length (int): The sequence length used for inference. max_new_tokens (int): The maximum number of new tokens allowed for generation. + model_thresholds_folder (path): The path where the files will be saved. Returns: None @@ -359,6 +355,8 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens): if "HF_HOME" not in os.environ: os.environ["HF_HOME"] = "/tmp/models/hf_cache" + model_prefix = get_model_prefix(model_path=model_path, shapes_size=0, include_shapes=False) + model_path_kwargs = {"variant": model_path} if args.variant else {"model_path": model_path} micro_model_kwargs = {"architecture": args.architecture} @@ -405,7 +403,7 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens): seq_length=seq_length, max_new_tokens=max_new_tokens, tokenizer=tokenizer) - torch.save(layer_stack_cpu, os.path.join(output_path, "layers-input-output", "layer_stack_cpu.pt")) + torch.save(layer_stack_cpu, os.path.join(output_path, "layers-input-output-logs", f"{model_prefix}-{mode}-layer_stack_cpu.pt")) global generate_iters generate_iters = 0 @@ -417,7 +415,7 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens): seq_length=seq_length, max_new_tokens=max_new_tokens, tokenizer=tokenizer) - torch.save(layer_stack_cuda, os.path.join(output_path, "layers-input-output", "layer_stack_cuda.pt")) + torch.save(layer_stack_cuda, os.path.join(output_path, "layers-input-output-logs", f"{model_prefix}-{mode}-layer_stack_cuda.pt")) assert len(layer_stack_cuda.keys()) == len(layer_stack_cpu.keys()) @@ -465,11 +463,19 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens): abs_diff = tensor_abs_diff(tensor_cpu_out, cuda_output) cos_sim = tensor_cos_sim(tensor_cpu_out, cuda_output) - prefix = get_default_validation_prefix(model_path, max_new_token, batch_size, seq_length, 'float16') layer_name = str(layer_key).replace('[','').replace(']', '') - abs_diff_path = os.path.join(output_path, f"{prefix}--{layer_name}.abs_diff.csv") - cos_sim_path = os.path.join(output_path, f"{prefix}--{layer_name}.cos_sim.csv") + prefix = get_model_prefix(model_path=model_path, + shapes_size=len(common_shapes), + max_new_tokens=max_new_token, + batch_size=batch_size, + seq_length=seq_length, + dtype='float16', + include_shapes=True + ) + + abs_diff_path = os.path.join(model_thresholds_folder, f"{prefix}--{layer_name}.abs_diff.csv") + cos_sim_path = os.path.join(model_thresholds_folder, f"{prefix}--{layer_name}.cos_sim.csv") cos_sim_res, cos_shape = get_metric_values(cos_sim) abs_diff_res, abs_diff_shape = get_metric_values(abs_diff) @@ -484,5 +490,21 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens): logger.info(f"Completed {model_path} layers' metrics generation with {mode} mode") for model_id, batch_size, sequence_length, max_new_token in common_shapes: + + model_prefix = get_model_prefix(model_path=model_id, + shapes_size=len(common_shapes), + include_shapes=False + ) + + model_thresholds_folder = os.path.join(output_path, model_prefix) + + if not os.path.exists(model_thresholds_folder): + os.makedirs(model_thresholds_folder) + logger.info(f"testing model_id-{model_id}, max_new_tokens-{max_new_token}, batch_size-{batch_size}, seq_length-{sequence_length}") - generate_layers_metrics(model_path=model_id, batch_size=batch_size, seq_length=sequence_length, max_new_tokens=max_new_token) + generate_layers_metrics(model_path=model_id, + batch_size=batch_size, + seq_length=sequence_length, + max_new_tokens=max_new_token, + model_thresholds_folder=model_thresholds_folder + ) From 511831c36f5a22a328afb1bed27bc14c614c9d79 Mon Sep 17 00:00:00 2001 From: Flavia Beo Date: Thu, 17 Jul 2025 17:03:40 -0300 Subject: [PATCH 10/20] Fix method call Signed-off-by: Flavia Beo --- scripts/generate_layers_metrics.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/scripts/generate_layers_metrics.py b/scripts/generate_layers_metrics.py index 7441011e..fa1c15ad 100644 --- a/scripts/generate_layers_metrics.py +++ b/scripts/generate_layers_metrics.py @@ -355,7 +355,13 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens, if "HF_HOME" not in os.environ: os.environ["HF_HOME"] = "/tmp/models/hf_cache" - model_prefix = get_model_prefix(model_path=model_path, shapes_size=0, include_shapes=False) + model_prefix = get_model_prefix(model_path=model_path, + shapes_size=0, + max_new_tokens=max_new_tokens, + batch_size=batch_size, + seq_length=seq_length, + dtype="", + include_shapes=False) model_path_kwargs = {"variant": model_path} if args.variant else {"model_path": model_path} micro_model_kwargs = {"architecture": args.architecture} @@ -491,8 +497,12 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens, for model_id, batch_size, sequence_length, max_new_token in common_shapes: - model_prefix = get_model_prefix(model_path=model_id, + model_prefix = get_model_prefix(model_id, shapes_size=len(common_shapes), + max_new_tokens=max_new_token, + batch_size=batch_size, + seq_length=sequence_length, + dtype="", include_shapes=False ) From 57c60f10acd1d91c108c4d3d320d4881a24c2915 Mon Sep 17 00:00:00 2001 From: Flavia Beo Date: Tue, 22 Jul 2025 10:12:47 -0300 Subject: [PATCH 11/20] Fix model th path Signed-off-by: Flavia Beo --- scripts/generate_layers_metrics.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/generate_layers_metrics.py b/scripts/generate_layers_metrics.py index fa1c15ad..b89d7455 100644 --- a/scripts/generate_layers_metrics.py +++ b/scripts/generate_layers_metrics.py @@ -508,8 +508,10 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens, model_thresholds_folder = os.path.join(output_path, model_prefix) - if not os.path.exists(model_thresholds_folder): + if not os.path.exists(output_path): os.makedirs(model_thresholds_folder) + else: + model_thresholds_folder = output_path logger.info(f"testing model_id-{model_id}, max_new_tokens-{max_new_token}, batch_size-{batch_size}, seq_length-{sequence_length}") generate_layers_metrics(model_path=model_id, From 44323ea1255fd09c01091cc074a5e90f35a51c7f Mon Sep 17 00:00:00 2001 From: Flavia Beo Date: Tue, 22 Jul 2025 10:19:00 -0300 Subject: [PATCH 12/20] Fix linting Signed-off-by: Flavia Beo --- scripts/generate_layers_metrics.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/scripts/generate_layers_metrics.py b/scripts/generate_layers_metrics.py index 21545400..641166b1 100644 --- a/scripts/generate_layers_metrics.py +++ b/scripts/generate_layers_metrics.py @@ -1,5 +1,4 @@ import os -import sys import time import logging import argparse @@ -92,7 +91,7 @@ logging.basicConfig(level=logging.DEBUG, format='%(asctime)s %(name)-12s %(message)s', datefmt='%m-%d %H:%M', - filename=os.path.join(output_path, "layers-input-output-logs", f"layers_input.log"), + filename=os.path.join(output_path, "layers-input-output-logs", "layers_input.log"), filemode='w') console = logging.StreamHandler() console.setLevel(os.getenv("LOG_LEVEL", "INFO")) From e87cf1d5ac7b3f24283ec7a7a46d1340c3caf20e Mon Sep 17 00:00:00 2001 From: Flavia Beo Date: Tue, 22 Jul 2025 10:21:54 -0300 Subject: [PATCH 13/20] Format with ruff Signed-off-by: Flavia Beo --- aiu_fms_testing_utils/utils/metrics_utils.py | 23 +- scripts/generate_layers_metrics.py | 231 ++++++++++++------- 2 files changed, 160 insertions(+), 94 deletions(-) diff --git a/aiu_fms_testing_utils/utils/metrics_utils.py b/aiu_fms_testing_utils/utils/metrics_utils.py index 5c58f953..aca6ba6c 100644 --- a/aiu_fms_testing_utils/utils/metrics_utils.py +++ b/aiu_fms_testing_utils/utils/metrics_utils.py @@ -3,13 +3,15 @@ import torch.nn as nn -def get_model_prefix(model_path, - shapes_size, - max_new_tokens: None, - batch_size: None, - seq_length: None, - dtype: None, - include_shapes: False): +def get_model_prefix( + model_path, + shapes_size, + max_new_tokens: None, + batch_size: None, + seq_length: None, + dtype: None, + include_shapes: False, +): """ Generate a prefix for a model based on its path and other parameters. @@ -25,19 +27,20 @@ def get_model_prefix(model_path, str: A prefix for the model based on its path and other parameters. """ if model_path.count("/") > 1: - # this means that the model_path does NOT match to the hf pattern + # this means that the model_path does NOT match to the hf pattern # Eg.: /home/another-dir/another/ibm-granite/granite-3.3-8b-base model_prefix = model_path.split("/")[-2] + "--" + model_path.split("/")[-1] else: - # this means that the model_path does match to the hf pattern + # this means that the model_path does match to the hf pattern # Eg.: ibm-granite/granite-3.3-8b-base model_prefix = model_path.replace("/", "--") if shapes_size > 1 or include_shapes: model_prefix = f"{model_prefix}_max-new-tokens-{max_new_tokens}_batch-size-{batch_size}_seq-length-{seq_length}_dtype-{dtype}" - + return model_prefix + def abs_diff_linalg_norm(res_vector): """ Calculates the Euclidean norm (also known as the L2 norm) of a given array res_vector. This is equivalent to finding the square diff --git a/scripts/generate_layers_metrics.py b/scripts/generate_layers_metrics.py index 641166b1..4c184e04 100644 --- a/scripts/generate_layers_metrics.py +++ b/scripts/generate_layers_metrics.py @@ -13,7 +13,11 @@ from transformers import AutoModelForCausalLM, AutoTokenizer from aiu_fms_testing_utils.utils import prepare_inputs -from aiu_fms_testing_utils.utils.metrics_utils import tensor_abs_diff, tensor_cos_sim, get_model_prefix +from aiu_fms_testing_utils.utils.metrics_utils import ( + tensor_abs_diff, + tensor_cos_sim, + get_model_prefix, +) parser = argparse.ArgumentParser( @@ -47,7 +51,7 @@ choices=["fms", "hf"], default="fms", required=True, - help="Which model loader/runner to be used; fms - IBM's Foundation Model Stack or hf - HuggingFace Transformers." + help="Which model loader/runner to be used; fms - IBM's Foundation Model Stack or hf - HuggingFace Transformers.", ) parser.add_argument( "--batch_sizes", @@ -85,21 +89,23 @@ output_path = args.output_path sharegpt_path = args.sharegpt_path -if not os.path.exists(os.path.join(output_path,"layers-input-output-logs")): - os.makedirs(os.path.join(output_path,"layers-input-output-logs")) +if not os.path.exists(os.path.join(output_path, "layers-input-output-logs")): + os.makedirs(os.path.join(output_path, "layers-input-output-logs")) -logging.basicConfig(level=logging.DEBUG, - format='%(asctime)s %(name)-12s %(message)s', - datefmt='%m-%d %H:%M', - filename=os.path.join(output_path, "layers-input-output-logs", "layers_input.log"), - filemode='w') +logging.basicConfig( + level=logging.DEBUG, + format="%(asctime)s %(name)-12s %(message)s", + datefmt="%m-%d %H:%M", + filename=os.path.join(output_path, "layers-input-output-logs", "layers_input.log"), + filemode="w", +) console = logging.StreamHandler() console.setLevel(os.getenv("LOG_LEVEL", "INFO")) -formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s') +formatter = logging.Formatter("%(name)-12s: %(levelname)-8s %(message)s") console.setFormatter(formatter) -logging.getLogger('').addHandler(console) +logging.getLogger("").addHandler(console) -logger = logging.getLogger('generate_layers_metrics') +logger = logging.getLogger("generate_layers_metrics") common_model_paths = args.model_path if args.model_path else args.variant if isinstance(common_model_paths, str): @@ -157,7 +163,7 @@ def __infer_layer(model, max_len, device, max_new_tokens, batch_size, tokenizer) if "cuda" in device: ids = ids.to("cuda") - + if args.model_loader == "hf": max_seq_len = max_len elif hasattr(model.config, "ntk_scaling") and model.config.ntk_scaling: @@ -183,11 +189,13 @@ def __infer_layer(model, max_len, device, max_new_tokens, batch_size, tokenizer) ) result, timings = result if args.model_loader == "hf": - result = model.generate(ids, - max_length=max_seq_len, - max_new_tokens=max_new_token, - do_sample=do_sample, - use_cache=use_cache) + result = model.generate( + ids, + max_length=max_seq_len, + max_new_tokens=max_new_token, + do_sample=do_sample, + use_cache=use_cache, + ) logger.info(f"Generation completed: Result len is {len(result)}") if len(result.shape) == 1: result = result.unsqueeze(0) @@ -345,7 +353,10 @@ def write_csv(values, path, metric, gpu_layer_shape, cpu_layer_shape, output_sha f.write(f"{values}\n") f.close() -def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens, model_thresholds_folder): + +def generate_layers_metrics( + model_path, batch_size, seq_length, max_new_tokens, model_thresholds_folder +): """ Generate metrics for layers in a given model. @@ -366,15 +377,19 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens, if "HF_HOME" not in os.environ: os.environ["HF_HOME"] = "/tmp/models/hf_cache" - model_prefix = get_model_prefix(model_path=model_path, - shapes_size=0, - max_new_tokens=max_new_tokens, - batch_size=batch_size, - seq_length=seq_length, - dtype="", - include_shapes=False) + model_prefix = get_model_prefix( + model_path=model_path, + shapes_size=0, + max_new_tokens=max_new_tokens, + batch_size=batch_size, + seq_length=seq_length, + dtype="", + include_shapes=False, + ) - model_path_kwargs = {"variant": model_path} if args.variant else {"model_path": model_path} + model_path_kwargs = ( + {"variant": model_path} if args.variant else {"model_path": model_path} + ) micro_model_kwargs = {"architecture": args.architecture} get_model_kwargs = { @@ -386,15 +401,13 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens, tokenizer = AutoTokenizer.from_pretrained(model_path) # prepare the cpu model - validation_model = AutoModelForCausalLM.from_pretrained(model_path, - device_map="cpu", - torch_dtype=torch.float32 - ) - # prepare the cuda model - validation_model_cuda = AutoModelForCausalLM.from_pretrained(model_path, - device_map="cuda", - torch_dtype=torch.float16 - ) + validation_model = AutoModelForCausalLM.from_pretrained( + model_path, device_map="cpu", torch_dtype=torch.float32 + ) + # prepare the cuda model + validation_model_cuda = AutoModelForCausalLM.from_pretrained( + model_path, device_map="cuda", torch_dtype=torch.float16 + ) if args.model_loader == "fms": tokenizer = tokenizers.get_tokenizer(model_path) @@ -414,25 +427,45 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens, **get_model_kwargs, ) - layer_stack_cpu = __register_call_layers(model=validation_model, - batch_size=batch_size, - device="cpu", - seq_length=seq_length, max_new_tokens=max_new_tokens, - tokenizer=tokenizer) - - torch.save(layer_stack_cpu, os.path.join(output_path, "layers-input-output-logs", f"{model_prefix}-{mode}-layer_stack_cpu.pt")) - + layer_stack_cpu = __register_call_layers( + model=validation_model, + batch_size=batch_size, + device="cpu", + seq_length=seq_length, + max_new_tokens=max_new_tokens, + tokenizer=tokenizer, + ) + + torch.save( + layer_stack_cpu, + os.path.join( + output_path, + "layers-input-output-logs", + f"{model_prefix}-{mode}-layer_stack_cpu.pt", + ), + ) + global generate_iters generate_iters = 0 logger.info("Finished registering CPU layers") - layer_stack_cuda = __register_call_layers(model=validation_model_cuda, - batch_size=batch_size, - device="cuda", - seq_length=seq_length, max_new_tokens=max_new_tokens, - tokenizer=tokenizer) - - torch.save(layer_stack_cuda, os.path.join(output_path, "layers-input-output-logs", f"{model_prefix}-{mode}-layer_stack_cuda.pt")) + layer_stack_cuda = __register_call_layers( + model=validation_model_cuda, + batch_size=batch_size, + device="cuda", + seq_length=seq_length, + max_new_tokens=max_new_tokens, + tokenizer=tokenizer, + ) + + torch.save( + layer_stack_cuda, + os.path.join( + output_path, + "layers-input-output-logs", + f"{model_prefix}-{mode}-layer_stack_cuda.pt", + ), + ) assert len(layer_stack_cuda.keys()) == len(layer_stack_cpu.keys()) @@ -456,8 +489,12 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens, tensor_cuda_out = cuda_output[-1] tensor_cpu_out = cpu_output[-1] for i in range(len(cpu_output)): - cos_sim.append(tensor_cos_sim(cuda_output[i], cpu_output[i].to('cuda'))) - abs_diff.append(tensor_abs_diff(cuda_output[i], cpu_output[i].to('cuda'))) + cos_sim.append( + tensor_cos_sim(cuda_output[i], cpu_output[i].to("cuda")) + ) + abs_diff.append( + tensor_abs_diff(cuda_output[i], cpu_output[i].to("cuda")) + ) else: head_tensor_cpu = cpu_output[-1] head_tensor_gpu = cuda_output[-1] @@ -466,32 +503,55 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens, for j in range(len(head_tensor_gpu[i])): tensor_cuda_out = head_tensor_gpu[i][j] tensor_cpu_out = head_tensor_cpu[i][j] - cos_sim.append(tensor_cos_sim(head_tensor_cpu[i][j].to('cuda'), head_tensor_gpu[i][j])) - abs_diff.append(tensor_abs_diff(head_tensor_cpu[i][j].to('cuda'), head_tensor_gpu[i][j])) + cos_sim.append( + tensor_cos_sim( + head_tensor_cpu[i][j].to("cuda"), + head_tensor_gpu[i][j], + ) + ) + abs_diff.append( + tensor_abs_diff( + head_tensor_cpu[i][j].to("cuda"), + head_tensor_gpu[i][j], + ) + ) else: tensor_cuda_out = head_tensor_gpu[i] tensor_cpu_out = head_tensor_cpu[i] - cos_sim.append(tensor_cos_sim(head_tensor_cpu[i].to('cuda'), head_tensor_gpu[i])) - abs_diff.append(tensor_abs_diff(head_tensor_cpu[i].to('cuda'), head_tensor_gpu[i])) + cos_sim.append( + tensor_cos_sim( + head_tensor_cpu[i].to("cuda"), head_tensor_gpu[i] + ) + ) + abs_diff.append( + tensor_abs_diff( + head_tensor_cpu[i].to("cuda"), head_tensor_gpu[i] + ) + ) else: tensor_cpu_out = cpu_output.to("cuda") tensor_cuda_out = cuda_output abs_diff = tensor_abs_diff(tensor_cpu_out, cuda_output) cos_sim = tensor_cos_sim(tensor_cpu_out, cuda_output) - layer_name = str(layer_key).replace('[','').replace(']', '') + layer_name = str(layer_key).replace("[", "").replace("]", "") - prefix = get_model_prefix(model_path=model_path, - shapes_size=len(common_shapes), - max_new_tokens=max_new_token, - batch_size=batch_size, - seq_length=seq_length, - dtype='float16', - include_shapes=True - ) + prefix = get_model_prefix( + model_path=model_path, + shapes_size=len(common_shapes), + max_new_tokens=max_new_token, + batch_size=batch_size, + seq_length=seq_length, + dtype="float16", + include_shapes=True, + ) - abs_diff_path = os.path.join(model_thresholds_folder, f"{prefix}--{layer_name}.abs_diff.csv") - cos_sim_path = os.path.join(model_thresholds_folder, f"{prefix}--{layer_name}.cos_sim.csv") + abs_diff_path = os.path.join( + model_thresholds_folder, f"{prefix}--{layer_name}.abs_diff.csv" + ) + cos_sim_path = os.path.join( + model_thresholds_folder, f"{prefix}--{layer_name}.cos_sim.csv" + ) cos_sim_res, cos_shape = get_metric_values(cos_sim) abs_diff_res, abs_diff_shape = get_metric_values(abs_diff) @@ -521,15 +581,15 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens, for model_id, batch_size, sequence_length, max_new_token in common_shapes: - - model_prefix = get_model_prefix(model_id, - shapes_size=len(common_shapes), - max_new_tokens=max_new_token, - batch_size=batch_size, - seq_length=sequence_length, - dtype="", - include_shapes=False - ) + model_prefix = get_model_prefix( + model_id, + shapes_size=len(common_shapes), + max_new_tokens=max_new_token, + batch_size=batch_size, + seq_length=sequence_length, + dtype="", + include_shapes=False, + ) model_thresholds_folder = os.path.join(output_path, model_prefix) @@ -538,10 +598,13 @@ def generate_layers_metrics(model_path, batch_size, seq_length, max_new_tokens, else: model_thresholds_folder = output_path - logger.info(f"testing model_id-{model_id}, max_new_tokens-{max_new_token}, batch_size-{batch_size}, seq_length-{sequence_length}") - generate_layers_metrics(model_path=model_id, - batch_size=batch_size, - seq_length=sequence_length, - max_new_tokens=max_new_token, - model_thresholds_folder=model_thresholds_folder - ) + logger.info( + f"testing model_id-{model_id}, max_new_tokens-{max_new_token}, batch_size-{batch_size}, seq_length-{sequence_length}" + ) + generate_layers_metrics( + model_path=model_id, + batch_size=batch_size, + seq_length=sequence_length, + max_new_tokens=max_new_token, + model_thresholds_folder=model_thresholds_folder, + ) From 44e97bf4470097322bb9c19d412fcb81911efe02 Mon Sep 17 00:00:00 2001 From: Flavia Beo Date: Thu, 24 Jul 2025 14:19:48 -0300 Subject: [PATCH 14/20] Fix output path Signed-off-by: Flavia Beo --- scripts/generate_layers_metrics.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/scripts/generate_layers_metrics.py b/scripts/generate_layers_metrics.py index 4c184e04..570616fb 100644 --- a/scripts/generate_layers_metrics.py +++ b/scripts/generate_layers_metrics.py @@ -75,7 +75,7 @@ help="Max number of generated tokens separated by comma. Eg.: 64,128", ) parser.add_argument( - "--output_path", type=str, default="/tmp/output", help="Path to save output files" + "--output_path", type=str, default=None, help="Path to save output files" ) parser.add_argument( "--sharegpt_path", @@ -88,6 +88,7 @@ mode = args.mode output_path = args.output_path sharegpt_path = args.sharegpt_path +default_path = "/home/senuser/models/deepview/layerwise-thresholds" if not os.path.exists(os.path.join(output_path, "layers-input-output-logs")): os.makedirs(os.path.join(output_path, "layers-input-output-logs")) @@ -591,9 +592,13 @@ def generate_layers_metrics( include_shapes=False, ) - model_thresholds_folder = os.path.join(output_path, model_prefix) + model_root_folder = os.path.join(default_path, model_prefix) + model_thresholds_folder = os.path.join(default_path, model_prefix, mode) - if not os.path.exists(output_path): + if not output_path or not os.path.exists(output_path): + os.makedirs(model_thresholds_folder) + elif not os.path.exists(model_root_folder): + os.makedirs(model_root_folder) os.makedirs(model_thresholds_folder) else: model_thresholds_folder = output_path From 4ec56345066471a608f2dba1bcca75e318846a7d Mon Sep 17 00:00:00 2001 From: Flavia Beo Date: Thu, 24 Jul 2025 14:37:02 -0300 Subject: [PATCH 15/20] Fix logging output path Signed-off-by: Flavia Beo --- scripts/generate_layers_metrics.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/scripts/generate_layers_metrics.py b/scripts/generate_layers_metrics.py index 570616fb..906496e5 100644 --- a/scripts/generate_layers_metrics.py +++ b/scripts/generate_layers_metrics.py @@ -75,7 +75,7 @@ help="Max number of generated tokens separated by comma. Eg.: 64,128", ) parser.add_argument( - "--output_path", type=str, default=None, help="Path to save output files" + "--output_path", type=str, default="/tmp/output", help="Path to save output files" ) parser.add_argument( "--sharegpt_path", @@ -603,6 +603,8 @@ def generate_layers_metrics( else: model_thresholds_folder = output_path + logging.basicConfig(filename=model_thresholds_folder) + logger.info( f"testing model_id-{model_id}, max_new_tokens-{max_new_token}, batch_size-{batch_size}, seq_length-{sequence_length}" ) From 562d4fb2a5732c78106ac3fc0328381e52ba9f68 Mon Sep 17 00:00:00 2001 From: Flavia Beo Date: Thu, 24 Jul 2025 14:45:10 -0300 Subject: [PATCH 16/20] Adds default th path as env var Signed-off-by: Flavia Beo --- scripts/generate_layers_metrics.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/scripts/generate_layers_metrics.py b/scripts/generate_layers_metrics.py index 906496e5..1d561af9 100644 --- a/scripts/generate_layers_metrics.py +++ b/scripts/generate_layers_metrics.py @@ -75,7 +75,7 @@ help="Max number of generated tokens separated by comma. Eg.: 64,128", ) parser.add_argument( - "--output_path", type=str, default="/tmp/output", help="Path to save output files" + "--output_path", type=str, default=None, help="Path to save output files" ) parser.add_argument( "--sharegpt_path", @@ -88,16 +88,13 @@ mode = args.mode output_path = args.output_path sharegpt_path = args.sharegpt_path -default_path = "/home/senuser/models/deepview/layerwise-thresholds" - -if not os.path.exists(os.path.join(output_path, "layers-input-output-logs")): - os.makedirs(os.path.join(output_path, "layers-input-output-logs")) +default_path = os.getenv("DEEPVIEW_THRESHOLDS_FOLDERPATH", "/home/senuser/models/deepview/layerwise-thresholds") logging.basicConfig( level=logging.DEBUG, format="%(asctime)s %(name)-12s %(message)s", datefmt="%m-%d %H:%M", - filename=os.path.join(output_path, "layers-input-output-logs", "layers_input.log"), + filename=os.path.join("/tmp", "layers-input-output-logs", "layers_input.log"), filemode="w", ) console = logging.StreamHandler() @@ -605,6 +602,10 @@ def generate_layers_metrics( logging.basicConfig(filename=model_thresholds_folder) + logger.info( + f"model output path is {model_thresholds_folder}" + ) + logger.info( f"testing model_id-{model_id}, max_new_tokens-{max_new_token}, batch_size-{batch_size}, seq_length-{sequence_length}" ) From 06ae7920cc51a3c3941c75ff63c773f36ceb1a81 Mon Sep 17 00:00:00 2001 From: Flavia Beo Date: Thu, 24 Jul 2025 14:49:09 -0300 Subject: [PATCH 17/20] Adds layer io dir Signed-off-by: Flavia Beo --- scripts/generate_layers_metrics.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/scripts/generate_layers_metrics.py b/scripts/generate_layers_metrics.py index 1d561af9..98a0b0e8 100644 --- a/scripts/generate_layers_metrics.py +++ b/scripts/generate_layers_metrics.py @@ -94,7 +94,7 @@ level=logging.DEBUG, format="%(asctime)s %(name)-12s %(message)s", datefmt="%m-%d %H:%M", - filename=os.path.join("/tmp", "layers-input-output-logs", "layers_input.log"), + filename=os.path.join("/tmp", "layers_input.log"), filemode="w", ) console = logging.StreamHandler() @@ -437,7 +437,7 @@ def generate_layers_metrics( torch.save( layer_stack_cpu, os.path.join( - output_path, + model_thresholds_folder, "layers-input-output-logs", f"{model_prefix}-{mode}-layer_stack_cpu.pt", ), @@ -459,7 +459,7 @@ def generate_layers_metrics( torch.save( layer_stack_cuda, os.path.join( - output_path, + model_thresholds_folder, "layers-input-output-logs", f"{model_prefix}-{mode}-layer_stack_cuda.pt", ), @@ -591,6 +591,7 @@ def generate_layers_metrics( model_root_folder = os.path.join(default_path, model_prefix) model_thresholds_folder = os.path.join(default_path, model_prefix, mode) + layer_io = os.join(model_thresholds_folder, "layers-input-output-logs") if not output_path or not os.path.exists(output_path): os.makedirs(model_thresholds_folder) @@ -600,7 +601,10 @@ def generate_layers_metrics( else: model_thresholds_folder = output_path - logging.basicConfig(filename=model_thresholds_folder) + if not os.path.exists(layer_io): + os.makedirs(layer_io) + + logging.basicConfig(filename=layer_io) logger.info( f"model output path is {model_thresholds_folder}" From f92a4aac9e3cd214a22ce6d295dcb436fba8bb43 Mon Sep 17 00:00:00 2001 From: Flavia Beo Date: Thu, 24 Jul 2025 14:49:54 -0300 Subject: [PATCH 18/20] Fix typo Signed-off-by: Flavia Beo --- scripts/generate_layers_metrics.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/scripts/generate_layers_metrics.py b/scripts/generate_layers_metrics.py index 98a0b0e8..290820ec 100644 --- a/scripts/generate_layers_metrics.py +++ b/scripts/generate_layers_metrics.py @@ -591,7 +591,7 @@ def generate_layers_metrics( model_root_folder = os.path.join(default_path, model_prefix) model_thresholds_folder = os.path.join(default_path, model_prefix, mode) - layer_io = os.join(model_thresholds_folder, "layers-input-output-logs") + layer_io = os.path.join(model_thresholds_folder, "layers-input-output-logs") if not output_path or not os.path.exists(output_path): os.makedirs(model_thresholds_folder) From f54ed94a6f783d585547c0b6a41d6a3c7c009eb5 Mon Sep 17 00:00:00 2001 From: Flavia Beo Date: Thu, 24 Jul 2025 15:02:27 -0300 Subject: [PATCH 19/20] Fix linting Signed-off-by: Flavia Beo --- scripts/generate_layers_metrics.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/scripts/generate_layers_metrics.py b/scripts/generate_layers_metrics.py index 290820ec..9f5d6f47 100644 --- a/scripts/generate_layers_metrics.py +++ b/scripts/generate_layers_metrics.py @@ -88,7 +88,10 @@ mode = args.mode output_path = args.output_path sharegpt_path = args.sharegpt_path -default_path = os.getenv("DEEPVIEW_THRESHOLDS_FOLDERPATH", "/home/senuser/models/deepview/layerwise-thresholds") +default_path = os.getenv( + "DEEPVIEW_THRESHOLDS_FOLDERPATH", + "/home/senuser/models/deepview/layerwise-thresholds", +) logging.basicConfig( level=logging.DEBUG, @@ -606,9 +609,7 @@ def generate_layers_metrics( logging.basicConfig(filename=layer_io) - logger.info( - f"model output path is {model_thresholds_folder}" - ) + logger.info(f"model output path is {model_thresholds_folder}") logger.info( f"testing model_id-{model_id}, max_new_tokens-{max_new_token}, batch_size-{batch_size}, seq_length-{sequence_length}" From be03f3aba3237bd51740cbd61f4bc0cb96be35fc Mon Sep 17 00:00:00 2001 From: Flavia Beo Date: Thu, 24 Jul 2025 17:29:06 -0300 Subject: [PATCH 20/20] Logging path Signed-off-by: Flavia Beo --- scripts/generate_layers_metrics.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/scripts/generate_layers_metrics.py b/scripts/generate_layers_metrics.py index 9f5d6f47..a40f3fd4 100644 --- a/scripts/generate_layers_metrics.py +++ b/scripts/generate_layers_metrics.py @@ -607,7 +607,13 @@ def generate_layers_metrics( if not os.path.exists(layer_io): os.makedirs(layer_io) - logging.basicConfig(filename=layer_io) + for handler in list(logger.handlers): + if isinstance(handler, logging.FileHandler): + logger.removeHandler(handler) + handler.close() # Close the file handle + + file_handler = logging.FileHandler(os.path.join(layer_io, "layers-io.log")) + logger.addHandler(file_handler) logger.info(f"model output path is {model_thresholds_folder}")