From 0571d047e46dd596f5b996bbf787279193cb80a6 Mon Sep 17 00:00:00 2001
From: Randall Smith <Randall.Smith@amd.com>
Date: Thu, 13 Feb 2025 03:58:03 +0000
Subject: [PATCH 01/10] DeepSeek performance improvements

Signed-off-by: Randall Smith <Randall.Smith@amd.com>
---
 benchmarks/kernels/benchmark_moe.py           |  16 +-
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 ++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 ++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ++++++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 ++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ++++++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 ++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ++++++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 ++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ++++++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 ++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ++++++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 ++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ++++++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 ++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ++++++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 ++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ++++++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 ++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ++++++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 ++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ++++++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 ++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ++++++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 ++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ++++++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 ++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ++++++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 ++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ++++++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 ++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ++++++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 ++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ++++++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 ++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ++++++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 ++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ++++++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 ++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ++++++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 ++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ++++++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 ++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ++++++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 ++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ++++++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 ++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ++++++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 ++++++++++++++
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ++++++++++++++++++
 .../layers/quantization/utils/fp8_utils.py    |  48 ++---
 51 files changed, 7169 insertions(+), 31 deletions(-)
 create mode 100644 vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json

diff --git a/benchmarks/kernels/benchmark_moe.py b/benchmarks/kernels/benchmark_moe.py
index a4a45c9cbff2..6b5d5454ee15 100644
--- a/benchmarks/kernels/benchmark_moe.py
+++ b/benchmarks/kernels/benchmark_moe.py
@@ -208,12 +208,12 @@ def get_configs_compute_bound(use_fp16) -> List[Dict[str, int]]:
 
 
 def prune_rocm_search_space(num_tokens, shard_intermediate_size, hidden_size,
-                            search_space, is_fp16):
+                            search_space, is_fp16, topk):
     N1, K1 = shard_intermediate_size, hidden_size
     N2, K2 = hidden_size, shard_intermediate_size // 2
-    pruned_space_1 = prune_rocm_configs(num_tokens * 2, N1, K1, search_space,
+    pruned_space_1 = prune_rocm_configs(num_tokens * topk, N1, K1, search_space,
                                         is_fp16)
-    pruned_space_2 = prune_rocm_configs(num_tokens * 2, N2, K2, search_space,
+    pruned_space_2 = prune_rocm_configs(num_tokens * topk, N2, K2, search_space,
                                         is_fp16)
     search_space = merge_unique_dicts(pruned_space_1, pruned_space_2)
     return search_space
@@ -380,7 +380,7 @@ def tune(
             search_space = prune_rocm_search_space(num_tokens,
                                                    shard_intermediate_size,
                                                    hidden_size, search_space,
-                                                   is_fp16)
+                                                   is_fp16, topk)
 
         with torch.cuda.device(self.device_id):
             for config in tqdm(search_space):
@@ -437,7 +437,7 @@ def sort_config(config: BenchmarkConfig) -> BenchmarkConfig:
 def save_configs(configs: Dict[int, BenchmarkConfig], num_experts: int,
                  shard_intermediate_size: int, hidden_size: int, topk: int,
                  dtype: torch.dtype, use_fp8_w8a8: bool,
-                 use_int8_w8a16: bool) -> None:
+                 use_int8_w8a16: bool, block_quant_shape: List[int]) -> None:
     dtype_str = get_config_dtype_str(dtype,
                                      use_int8_w8a16=use_int8_w8a16,
                                      use_fp8_w8a8=use_fp8_w8a8)
@@ -445,7 +445,7 @@ def save_configs(configs: Dict[int, BenchmarkConfig], num_experts: int,
     # NOTE(woosuk): The current naming convention uses w2.shape[2], which
     # is the intermediate size after silu_and_mul.
     filename = get_config_file_name(num_experts, shard_intermediate_size // 2,
-                                    dtype_str)
+                                    dtype_str, block_quant_shape)
 
     print(f"Writing best config to {filename}...")
     with open(filename, "w") as f:
@@ -458,6 +458,7 @@ def main(args: argparse.Namespace):
 
     config = AutoConfig.from_pretrained(
         args.model, trust_remote_code=args.trust_remote_code)
+    block_quant_shape = None
     if config.architectures[0] == "DbrxForCausalLM":
         E = config.ffn_config.moe_num_experts
         topk = config.ffn_config.moe_top_k
@@ -473,6 +474,7 @@ def main(args: argparse.Namespace):
         topk = config.num_experts_per_tok
         intermediate_size = config.moe_intermediate_size
         shard_intermediate_size = 2 * intermediate_size // args.tp_size
+        block_quant_shape = [128, 128]
     else:
         # Default: Mixtral.
         E = config.num_local_experts
@@ -523,7 +525,7 @@ def _distribute(method: str, inputs: List[Any]) -> List[Any]:
             for M, config in zip(batch_sizes, configs)
         }
         save_configs(best_configs, E, shard_intermediate_size, hidden_size,
-                     topk, dtype, use_fp8_w8a8, use_int8_w8a16)
+                     topk, dtype, use_fp8_w8a8, use_int8_w8a16, block_quant_shape)
         end = time.time()
         print(f"Tuning took {end - start:.2f} seconds")
     else:
diff --git a/vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..2b1167fc71e2
--- /dev/null
+++ b/vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 32,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2,
+        "waves_per_eu": 0
+    },
+    "2": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 16,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 2,
+        "num_stages": 2,
+        "waves_per_eu": 0
+    },
+    "4": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2,
+        "waves_per_eu": 0
+    },
+    "8": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2,
+        "waves_per_eu": 0
+    },
+    "16": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 4,
+        "num_warps": 2,
+        "num_stages": 2,
+        "waves_per_eu": 0
+    },
+    "24": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 128,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4,
+        "num_stages": 2,
+        "waves_per_eu": 0
+    },
+    "32": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 4,
+        "num_warps": 2,
+        "num_stages": 2,
+        "waves_per_eu": 0
+    },
+    "48": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 4,
+        "num_warps": 2,
+        "num_stages": 2,
+        "waves_per_eu": 0
+    },
+    "64": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 2,
+        "num_stages": 2,
+        "waves_per_eu": 0
+    },
+    "96": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 4,
+        "num_warps": 4,
+        "num_stages": 2,
+        "waves_per_eu": 0
+    },
+    "128": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 2,
+        "num_stages": 2,
+        "waves_per_eu": 0
+    },
+    "256": {
+        "BLOCK_SIZE_M": 16,
+        "BLOCK_SIZE_N": 64,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 4,
+        "num_warps": 4,
+        "num_stages": 2,
+        "waves_per_eu": 0
+    },
+    "512": {
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 8,
+        "num_stages": 2,
+        "waves_per_eu": 0
+    },
+    "1024": {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 256,
+        "BLOCK_SIZE_K": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 8,
+        "num_stages": 2,
+        "waves_per_eu": 0
+    }
+}
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..d1d2f6cfea80
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..e9a50e1d651f
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..44fe3be6e468
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..b3bf9ea26bee
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..0532845fa352
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 32,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 32,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 32,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..defaacb32030
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..0aa55c156b30
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..987c8f600ea1
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..e9a26c62493a
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..b3ed43aafbd0
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..57f8e00d75d8
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..03e8235353c7
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..901f7cef7483
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..1a457b92a0ba
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..eaa079feb3a5
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..4415cc9d0bfa
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..2b60a1c67eda
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..7c039b409acb
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..18573585c2a8
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..5c604b9b6d9a
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..298b704da6a6
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 32,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 32,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..b4d25aef96ec
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..9e4d91bbbc17
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..634c1bfab62a
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..f9a454e7fcec
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..19452dfe77b8
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..00166252e4a3
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..e6d910735f3a
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..d4624bd8f28c
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..c298da80a937
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..7303c2166213
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..cb993c878fc9
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..616536c9ac62
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 32,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 32,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 32,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 32,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..56d3e1feea23
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..b4be351e6295
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..9cdff134dba1
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..006fd458e7e9
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..6f9bd755cdad
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..850fac7f3f8f
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..c7122d3b960b
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..92892b065212
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..3cea21b4d722
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..b08fa8c54d5f
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 32,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 32,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 32,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..a8141f535bcf
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 32,
+        "BLOCK_SIZE_M": 256,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..b7882b061d15
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..c9566d713260
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..b971426f3435
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
new file mode 100644
index 000000000000..e4716875871f
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/fp8_utils.py b/vllm/model_executor/layers/quantization/utils/fp8_utils.py
index 9895537c219a..8a70b2d56243 100644
--- a/vllm/model_executor/layers/quantization/utils/fp8_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/fp8_utils.py
@@ -446,14 +446,14 @@ def get_w8a8_block_fp8_configs(N: int, K: int, block_n: int,
     return None
 
 
-def w8a8_block_fp8_matmul(
-    A: torch.Tensor,
-    B: torch.Tensor,
-    As: torch.Tensor,
-    Bs: torch.Tensor,
-    block_size: List[int],
-    output_dtype: torch.dtype = torch.float16,
-) -> torch.Tensor:
+def w8a8_block_fp8_matmul(A: torch.Tensor,
+                          B: torch.Tensor,
+                          As: torch.Tensor,
+                          Bs: torch.Tensor,
+                          block_size: List[int],
+                          output_dtype: torch.dtype = torch.float16,
+                          tune_config=None,
+                          use_default_config=False) -> torch.Tensor:
     """This function performs matrix multiplication with block-wise
     quantization.
     It takes two input tensors `A` and `B` with scales `As` and `Bs`.
@@ -485,22 +485,22 @@ def w8a8_block_fp8_matmul(
     C_shape = A.shape[:-1] + (N, )
     C = A.new_empty(C_shape, dtype=output_dtype)
 
-    configs = get_w8a8_block_fp8_configs(N, K, block_size[0], block_size[1])
-    if configs:
-        # Get the optimal config if there is one
-        config = configs[min(configs.keys(), key=lambda x: abs(x - M))]
-    else:
-        # Default config
-        # Block-wise quant: BLOCK_SIZE_N must be divisible by block_size[0]
-        # BLOCK_SIZE_K must be divisible by block_size[1]
-        config = {
-            "BLOCK_SIZE_M": 64,
-            "BLOCK_SIZE_N": block_size[0],
-            "BLOCK_SIZE_K": block_size[1],
-            "GROUP_SIZE_M": 32,
-            "num_warps": 4,
-            "num_stages": 2,
-        }
+    default_config = {
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": block_size[0],
+        "BLOCK_SIZE_K": block_size[1],
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4,
+        "num_stages": 2,
+    }
+
+    config = default_config if use_default_config else tune_config
+    if config is None:
+        configs = get_w8a8_block_fp8_configs(N, K, block_size[0],
+                                             block_size[1])
+        config = configs[min(
+            configs.keys(),
+            key=lambda x: abs(x - M))] if configs else default_config
 
     def grid(META):
         return (triton.cdiv(M, META["BLOCK_SIZE_M"]) *

From 974e422c56f260d7ab0057c1574ecde6c46d7e16 Mon Sep 17 00:00:00 2001
From: Randall Smith <Randall.Smith@amd.com>
Date: Thu, 13 Feb 2025 04:11:36 +0000
Subject: [PATCH 02/10] revert fp8_utils.py

Signed-off-by: Randall Smith <Randall.Smith@amd.com>
---
 .../layers/quantization/utils/fp8_utils.py    | 48 +++++++++----------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/vllm/model_executor/layers/quantization/utils/fp8_utils.py b/vllm/model_executor/layers/quantization/utils/fp8_utils.py
index 8a70b2d56243..9895537c219a 100644
--- a/vllm/model_executor/layers/quantization/utils/fp8_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/fp8_utils.py
@@ -446,14 +446,14 @@ def get_w8a8_block_fp8_configs(N: int, K: int, block_n: int,
     return None
 
 
-def w8a8_block_fp8_matmul(A: torch.Tensor,
-                          B: torch.Tensor,
-                          As: torch.Tensor,
-                          Bs: torch.Tensor,
-                          block_size: List[int],
-                          output_dtype: torch.dtype = torch.float16,
-                          tune_config=None,
-                          use_default_config=False) -> torch.Tensor:
+def w8a8_block_fp8_matmul(
+    A: torch.Tensor,
+    B: torch.Tensor,
+    As: torch.Tensor,
+    Bs: torch.Tensor,
+    block_size: List[int],
+    output_dtype: torch.dtype = torch.float16,
+) -> torch.Tensor:
     """This function performs matrix multiplication with block-wise
     quantization.
     It takes two input tensors `A` and `B` with scales `As` and `Bs`.
@@ -485,22 +485,22 @@ def w8a8_block_fp8_matmul(A: torch.Tensor,
     C_shape = A.shape[:-1] + (N, )
     C = A.new_empty(C_shape, dtype=output_dtype)
 
-    default_config = {
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": block_size[0],
-        "BLOCK_SIZE_K": block_size[1],
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4,
-        "num_stages": 2,
-    }
-
-    config = default_config if use_default_config else tune_config
-    if config is None:
-        configs = get_w8a8_block_fp8_configs(N, K, block_size[0],
-                                             block_size[1])
-        config = configs[min(
-            configs.keys(),
-            key=lambda x: abs(x - M))] if configs else default_config
+    configs = get_w8a8_block_fp8_configs(N, K, block_size[0], block_size[1])
+    if configs:
+        # Get the optimal config if there is one
+        config = configs[min(configs.keys(), key=lambda x: abs(x - M))]
+    else:
+        # Default config
+        # Block-wise quant: BLOCK_SIZE_N must be divisible by block_size[0]
+        # BLOCK_SIZE_K must be divisible by block_size[1]
+        config = {
+            "BLOCK_SIZE_M": 64,
+            "BLOCK_SIZE_N": block_size[0],
+            "BLOCK_SIZE_K": block_size[1],
+            "GROUP_SIZE_M": 32,
+            "num_warps": 4,
+            "num_stages": 2,
+        }
 
     def grid(META):
         return (triton.cdiv(M, META["BLOCK_SIZE_M"]) *

From defd28a0bf171c8083023a9aa1e3dd4867c67671 Mon Sep 17 00:00:00 2001
From: Randall Smith <Randall.Smith@amd.com>
Date: Thu, 13 Feb 2025 04:23:09 +0000
Subject: [PATCH 03/10] yapf

Signed-off-by: Randall Smith <Randall.Smith@amd.com>
---
 benchmarks/kernels/benchmark_moe.py | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/benchmarks/kernels/benchmark_moe.py b/benchmarks/kernels/benchmark_moe.py
index 6b5d5454ee15..b75d8dc67ff6 100644
--- a/benchmarks/kernels/benchmark_moe.py
+++ b/benchmarks/kernels/benchmark_moe.py
@@ -211,10 +211,10 @@ def prune_rocm_search_space(num_tokens, shard_intermediate_size, hidden_size,
                             search_space, is_fp16, topk):
     N1, K1 = shard_intermediate_size, hidden_size
     N2, K2 = hidden_size, shard_intermediate_size // 2
-    pruned_space_1 = prune_rocm_configs(num_tokens * topk, N1, K1, search_space,
-                                        is_fp16)
-    pruned_space_2 = prune_rocm_configs(num_tokens * topk, N2, K2, search_space,
-                                        is_fp16)
+    pruned_space_1 = prune_rocm_configs(num_tokens * topk, N1, K1,
+                                        search_space, is_fp16)
+    pruned_space_2 = prune_rocm_configs(num_tokens * topk, N2, K2,
+                                        search_space, is_fp16)
     search_space = merge_unique_dicts(pruned_space_1, pruned_space_2)
     return search_space
 
@@ -436,8 +436,8 @@ def sort_config(config: BenchmarkConfig) -> BenchmarkConfig:
 
 def save_configs(configs: Dict[int, BenchmarkConfig], num_experts: int,
                  shard_intermediate_size: int, hidden_size: int, topk: int,
-                 dtype: torch.dtype, use_fp8_w8a8: bool,
-                 use_int8_w8a16: bool, block_quant_shape: List[int]) -> None:
+                 dtype: torch.dtype, use_fp8_w8a8: bool, use_int8_w8a16: bool,
+                 block_quant_shape: List[int]) -> None:
     dtype_str = get_config_dtype_str(dtype,
                                      use_int8_w8a16=use_int8_w8a16,
                                      use_fp8_w8a8=use_fp8_w8a8)
@@ -525,7 +525,8 @@ def _distribute(method: str, inputs: List[Any]) -> List[Any]:
             for M, config in zip(batch_sizes, configs)
         }
         save_configs(best_configs, E, shard_intermediate_size, hidden_size,
-                     topk, dtype, use_fp8_w8a8, use_int8_w8a16, block_quant_shape)
+                     topk, dtype, use_fp8_w8a8, use_int8_w8a16,
+                     block_quant_shape)
         end = time.time()
         print(f"Tuning took {end - start:.2f} seconds")
     else:

From 4944224de5fed8d66f34504f0c2f44edbec7a37c Mon Sep 17 00:00:00 2001
From: Randall Smith <Randall.Smith@amd.com>
Date: Thu, 13 Feb 2025 17:32:48 +0000
Subject: [PATCH 04/10] remove space from file names

Signed-off-by: Randall Smith <Randall.Smith@amd.com>
---
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 --------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ------------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 --------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ------------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 --------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ------------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 --------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ------------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 --------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ------------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 --------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ------------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 --------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ------------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 --------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ------------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 --------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ------------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 --------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ------------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 --------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ------------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 --------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ------------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 --------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ------------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 --------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ------------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 --------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ------------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 --------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ------------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 --------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ------------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 --------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ------------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 --------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ------------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 --------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ------------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 --------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ------------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 --------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ------------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 --------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ------------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 128 --------------
 ...dtype=fp8_w8a8,block_shape=[128, 128].json | 164 ------------------
 48 files changed, 7008 deletions(-)
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json

diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index d1d2f6cfea80..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index e9a50e1d651f..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,164 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index 44fe3be6e468..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index b3bf9ea26bee..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,164 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index 0532845fa352..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 32,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 32,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 32,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index defaacb32030..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,164 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 128,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index 0aa55c156b30..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index 987c8f600ea1..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,164 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index e9a26c62493a..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index b3ed43aafbd0..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,164 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index 57f8e00d75d8..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index 03e8235353c7..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,164 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index 901f7cef7483..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index 1a457b92a0ba..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,164 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index eaa079feb3a5..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index 4415cc9d0bfa..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,164 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index 2b60a1c67eda..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index 7c039b409acb..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,164 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index 18573585c2a8..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index 5c604b9b6d9a..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,164 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index 298b704da6a6..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 32,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 32,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index b4d25aef96ec..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,164 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 128,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 128,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index 9e4d91bbbc17..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index 634c1bfab62a..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,164 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index f9a454e7fcec..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index 19452dfe77b8..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,164 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index 00166252e4a3..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index e6d910735f3a..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,164 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index d4624bd8f28c..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index c298da80a937..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,164 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index 7303c2166213..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index cb993c878fc9..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,164 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index 616536c9ac62..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 32,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 32,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 32,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 256,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 32,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 256,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 256,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 256,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 256,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 256,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index 56d3e1feea23..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,164 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 128,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 128,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index b4be351e6295..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index 9cdff134dba1..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,164 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index 006fd458e7e9..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index 6f9bd755cdad..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,164 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index 850fac7f3f8f..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index c7122d3b960b..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,164 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index 92892b065212..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index 3cea21b4d722..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,164 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index b08fa8c54d5f..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 32,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 256,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 32,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 32,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 256,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 256,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 256,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 256,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 256,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 256,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index a8141f535bcf..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,164 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 32,
-        "BLOCK_SIZE_M": 256,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index b7882b061d15..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index c9566d713260..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,164 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index b971426f3435..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
deleted file mode 100644
index e4716875871f..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json	
+++ /dev/null
@@ -1,164 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "kpack": 1,
-        "matrix_instr_nonkdim": 16,
-        "num_warps": 4
-    }
-}
\ No newline at end of file

From 34e6b83537f3f486f56985a9ffa329111e7a51b7 Mon Sep 17 00:00:00 2001
From: Randall Smith <Randall.Smith@amd.com>
Date: Thu, 13 Feb 2025 17:33:45 +0000
Subject: [PATCH 05/10] remove white space from file names

Signed-off-by: Randall Smith <Randall.Smith@amd.com>
---
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 164 ++++++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 164 ++++++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 164 ++++++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 164 ++++++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 164 ++++++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 164 ++++++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 164 ++++++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 164 ++++++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 164 ++++++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 164 ++++++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 164 ++++++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 164 ++++++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 164 ++++++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 164 ++++++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 164 ++++++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 164 ++++++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 164 ++++++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 164 ++++++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 164 ++++++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 164 ++++++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 164 ++++++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 164 ++++++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 164 ++++++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ++++++++++++++
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 164 ++++++++++++++++++
 48 files changed, 7008 insertions(+)
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 create mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json

diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..d1d2f6cfea80
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..e9a50e1d651f
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..44fe3be6e468
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..b3bf9ea26bee
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..0532845fa352
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 32,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 32,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 32,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..defaacb32030
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..0aa55c156b30
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..987c8f600ea1
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..e9a26c62493a
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..b3ed43aafbd0
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..57f8e00d75d8
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..03e8235353c7
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..901f7cef7483
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..1a457b92a0ba
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..eaa079feb3a5
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..4415cc9d0bfa
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..2b60a1c67eda
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..7c039b409acb
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..18573585c2a8
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..5c604b9b6d9a
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..298b704da6a6
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 32,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 32,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..b4d25aef96ec
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..9e4d91bbbc17
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..634c1bfab62a
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..f9a454e7fcec
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..19452dfe77b8
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..00166252e4a3
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..e6d910735f3a
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..d4624bd8f28c
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..c298da80a937
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..7303c2166213
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..cb993c878fc9
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..616536c9ac62
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 32,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 32,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 32,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 32,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..56d3e1feea23
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 128,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..b4be351e6295
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..9cdff134dba1
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..006fd458e7e9
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..6f9bd755cdad
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..850fac7f3f8f
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..c7122d3b960b
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..92892b065212
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..3cea21b4d722
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..b08fa8c54d5f
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 32,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 32,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 32,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 64,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 256,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..a8141f535bcf
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 32,
+        "BLOCK_SIZE_M": 256,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..b7882b061d15
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..c9566d713260
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..b971426f3435
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,128 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 256,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 1,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 128,
+        "GROUP_SIZE_M": 32,
+        "num_warps": 4
+    }
+}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
new file mode 100644
index 000000000000..e4716875871f
--- /dev/null
+++ b/vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
@@ -0,0 +1,164 @@
+{
+    "1": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "8": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "16": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "24": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "32": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 16,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "48": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 8,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "64": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 32,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "96": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 32,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "128": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 32,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "256": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "512": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1024": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "1536": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "2048": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "3072": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    },
+    "4096": {
+        "BLOCK_SIZE_K": 128,
+        "BLOCK_SIZE_M": 64,
+        "BLOCK_SIZE_N": 64,
+        "GROUP_SIZE_M": 1,
+        "kpack": 1,
+        "matrix_instr_nonkdim": 16,
+        "num_warps": 4
+    }
+}
\ No newline at end of file

From c7abb8f0f6bcd2486ec98650b70921a2895f6cbe Mon Sep 17 00:00:00 2001
From: Randall Smith <Randall.Smith@amd.com>
Date: Thu, 13 Feb 2025 19:24:57 +0000
Subject: [PATCH 06/10] remove XHF tunings

Signed-off-by: Randall Smith <Randall.Smith@amd.com>
---
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ------------------
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ------------------
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ------------------
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ------------------
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ------------------
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ------------------
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ------------------
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ------------------
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ------------------
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ------------------
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ------------------
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ------------------
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ------------------
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ------------------
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ------------------
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ------------------
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ------------------
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ------------------
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ------------------
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ------------------
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ------------------
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ------------------
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ------------------
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ------------------
 24 files changed, 3072 deletions(-)
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
 delete mode 100644 vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json

diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
deleted file mode 100644
index d1d2f6cfea80..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
deleted file mode 100644
index 44fe3be6e468..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
deleted file mode 100644
index 0532845fa352..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 32,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 32,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 32,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
deleted file mode 100644
index 0aa55c156b30..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
deleted file mode 100644
index e9a26c62493a..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
deleted file mode 100644
index 57f8e00d75d8..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
deleted file mode 100644
index 901f7cef7483..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
deleted file mode 100644
index eaa079feb3a5..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
deleted file mode 100644
index 2b60a1c67eda..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
deleted file mode 100644
index 18573585c2a8..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
deleted file mode 100644
index 298b704da6a6..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 32,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 32,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
deleted file mode 100644
index 9e4d91bbbc17..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
deleted file mode 100644
index f9a454e7fcec..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
deleted file mode 100644
index 00166252e4a3..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
deleted file mode 100644
index d4624bd8f28c..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
deleted file mode 100644
index 7303c2166213..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
deleted file mode 100644
index 616536c9ac62..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 32,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 32,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 32,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 256,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 32,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 256,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 256,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 256,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 256,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 256,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
deleted file mode 100644
index b4be351e6295..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
deleted file mode 100644
index 006fd458e7e9..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
deleted file mode 100644
index 850fac7f3f8f..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
deleted file mode 100644
index 92892b065212..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
deleted file mode 100644
index b08fa8c54d5f..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 32,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 256,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 32,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 32,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 64,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 256,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 256,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 256,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 256,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 256,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 64,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 256,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
deleted file mode 100644
index b7882b061d15..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
deleted file mode 100644
index b971426f3435..000000000000
--- a/vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "2": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "4": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "8": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "16": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "24": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "32": {
-        "BLOCK_SIZE_K": 256,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "48": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 4
-    },
-    "64": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 32,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "96": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 16,
-        "num_warps": 4
-    },
-    "128": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "256": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4
-    },
-    "512": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1024": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "1536": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "2048": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "3072": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    },
-    "4096": {
-        "BLOCK_SIZE_K": 128,
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 128,
-        "GROUP_SIZE_M": 32,
-        "num_warps": 4
-    }
-}
\ No newline at end of file

From 1b5690684de927341186fb68787b5f991f643db8 Mon Sep 17 00:00:00 2001
From: Randall Smith <Randall.Smith@amd.com>
Date: Thu, 13 Feb 2025 20:13:00 +0000
Subject: [PATCH 07/10] Remove space from config

Signed-off-by: Randall Smith <Randall.Smith@amd.com>
---
 ...stinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json} | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename vllm/model_executor/layers/fused_moe/configs/{E=256,N=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json => E=256,N=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json} (100%)

diff --git a/vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json b/vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
similarity index 100%
rename from vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128, 128].json
rename to vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json

From af0a44d33ebdba40c1d0e17413646b90959b210c Mon Sep 17 00:00:00 2001
From: Randall Smith <Randall.Smith@amd.com>
Date: Thu, 13 Feb 2025 20:33:00 +0000
Subject: [PATCH 08/10] Remove changes to benchmark_moe.py

Signed-off-by: Randall Smith <Randall.Smith@amd.com>
---
 benchmarks/kernels/benchmark_moe.py | 23 ++++++++++-------------
 1 file changed, 10 insertions(+), 13 deletions(-)

diff --git a/benchmarks/kernels/benchmark_moe.py b/benchmarks/kernels/benchmark_moe.py
index b75d8dc67ff6..a4a45c9cbff2 100644
--- a/benchmarks/kernels/benchmark_moe.py
+++ b/benchmarks/kernels/benchmark_moe.py
@@ -208,13 +208,13 @@ def get_configs_compute_bound(use_fp16) -> List[Dict[str, int]]:
 
 
 def prune_rocm_search_space(num_tokens, shard_intermediate_size, hidden_size,
-                            search_space, is_fp16, topk):
+                            search_space, is_fp16):
     N1, K1 = shard_intermediate_size, hidden_size
     N2, K2 = hidden_size, shard_intermediate_size // 2
-    pruned_space_1 = prune_rocm_configs(num_tokens * topk, N1, K1,
-                                        search_space, is_fp16)
-    pruned_space_2 = prune_rocm_configs(num_tokens * topk, N2, K2,
-                                        search_space, is_fp16)
+    pruned_space_1 = prune_rocm_configs(num_tokens * 2, N1, K1, search_space,
+                                        is_fp16)
+    pruned_space_2 = prune_rocm_configs(num_tokens * 2, N2, K2, search_space,
+                                        is_fp16)
     search_space = merge_unique_dicts(pruned_space_1, pruned_space_2)
     return search_space
 
@@ -380,7 +380,7 @@ def tune(
             search_space = prune_rocm_search_space(num_tokens,
                                                    shard_intermediate_size,
                                                    hidden_size, search_space,
-                                                   is_fp16, topk)
+                                                   is_fp16)
 
         with torch.cuda.device(self.device_id):
             for config in tqdm(search_space):
@@ -436,8 +436,8 @@ def sort_config(config: BenchmarkConfig) -> BenchmarkConfig:
 
 def save_configs(configs: Dict[int, BenchmarkConfig], num_experts: int,
                  shard_intermediate_size: int, hidden_size: int, topk: int,
-                 dtype: torch.dtype, use_fp8_w8a8: bool, use_int8_w8a16: bool,
-                 block_quant_shape: List[int]) -> None:
+                 dtype: torch.dtype, use_fp8_w8a8: bool,
+                 use_int8_w8a16: bool) -> None:
     dtype_str = get_config_dtype_str(dtype,
                                      use_int8_w8a16=use_int8_w8a16,
                                      use_fp8_w8a8=use_fp8_w8a8)
@@ -445,7 +445,7 @@ def save_configs(configs: Dict[int, BenchmarkConfig], num_experts: int,
     # NOTE(woosuk): The current naming convention uses w2.shape[2], which
     # is the intermediate size after silu_and_mul.
     filename = get_config_file_name(num_experts, shard_intermediate_size // 2,
-                                    dtype_str, block_quant_shape)
+                                    dtype_str)
 
     print(f"Writing best config to {filename}...")
     with open(filename, "w") as f:
@@ -458,7 +458,6 @@ def main(args: argparse.Namespace):
 
     config = AutoConfig.from_pretrained(
         args.model, trust_remote_code=args.trust_remote_code)
-    block_quant_shape = None
     if config.architectures[0] == "DbrxForCausalLM":
         E = config.ffn_config.moe_num_experts
         topk = config.ffn_config.moe_top_k
@@ -474,7 +473,6 @@ def main(args: argparse.Namespace):
         topk = config.num_experts_per_tok
         intermediate_size = config.moe_intermediate_size
         shard_intermediate_size = 2 * intermediate_size // args.tp_size
-        block_quant_shape = [128, 128]
     else:
         # Default: Mixtral.
         E = config.num_local_experts
@@ -525,8 +523,7 @@ def _distribute(method: str, inputs: List[Any]) -> List[Any]:
             for M, config in zip(batch_sizes, configs)
         }
         save_configs(best_configs, E, shard_intermediate_size, hidden_size,
-                     topk, dtype, use_fp8_w8a8, use_int8_w8a16,
-                     block_quant_shape)
+                     topk, dtype, use_fp8_w8a8, use_int8_w8a16)
         end = time.time()
         print(f"Tuning took {end - start:.2f} seconds")
     else:

From 80510047e9cda2ac551de500e72e3599fb2e1b50 Mon Sep 17 00:00:00 2001
From: Randall Smith <Randall.Smith@amd.com>
Date: Fri, 14 Feb 2025 21:23:02 +0000
Subject: [PATCH 09/10] remove moe tuning

Signed-off-by: Randall Smith <Randall.Smith@amd.com>
---
 ...,dtype=fp8_w8a8,block_shape=[128,128].json | 128 ------------------
 1 file changed, 128 deletions(-)
 delete mode 100644 vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json

diff --git a/vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
deleted file mode 100644
index 2b1167fc71e2..000000000000
--- a/vllm/model_executor/layers/fused_moe/configs/E=256,N=256,device_name=AMD_Instinct_MI300XHF_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
+++ /dev/null
@@ -1,128 +0,0 @@
-{
-    "1": {
-        "BLOCK_SIZE_M": 16,
-        "BLOCK_SIZE_N": 32,
-        "BLOCK_SIZE_K": 256,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4,
-        "num_stages": 2,
-        "waves_per_eu": 0
-    },
-    "2": {
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 16,
-        "BLOCK_SIZE_K": 256,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 2,
-        "num_stages": 2,
-        "waves_per_eu": 0
-    },
-    "4": {
-        "BLOCK_SIZE_M": 16,
-        "BLOCK_SIZE_N": 64,
-        "BLOCK_SIZE_K": 256,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4,
-        "num_stages": 2,
-        "waves_per_eu": 0
-    },
-    "8": {
-        "BLOCK_SIZE_M": 16,
-        "BLOCK_SIZE_N": 128,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4,
-        "num_stages": 2,
-        "waves_per_eu": 0
-    },
-    "16": {
-        "BLOCK_SIZE_M": 16,
-        "BLOCK_SIZE_N": 64,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 4,
-        "num_warps": 2,
-        "num_stages": 2,
-        "waves_per_eu": 0
-    },
-    "24": {
-        "BLOCK_SIZE_M": 16,
-        "BLOCK_SIZE_N": 128,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 4,
-        "num_stages": 2,
-        "waves_per_eu": 0
-    },
-    "32": {
-        "BLOCK_SIZE_M": 16,
-        "BLOCK_SIZE_N": 64,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 4,
-        "num_warps": 2,
-        "num_stages": 2,
-        "waves_per_eu": 0
-    },
-    "48": {
-        "BLOCK_SIZE_M": 16,
-        "BLOCK_SIZE_N": 64,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 4,
-        "num_warps": 2,
-        "num_stages": 2,
-        "waves_per_eu": 0
-    },
-    "64": {
-        "BLOCK_SIZE_M": 16,
-        "BLOCK_SIZE_N": 64,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 2,
-        "num_stages": 2,
-        "waves_per_eu": 0
-    },
-    "96": {
-        "BLOCK_SIZE_M": 16,
-        "BLOCK_SIZE_N": 64,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 4,
-        "num_warps": 4,
-        "num_stages": 2,
-        "waves_per_eu": 0
-    },
-    "128": {
-        "BLOCK_SIZE_M": 16,
-        "BLOCK_SIZE_N": 64,
-        "BLOCK_SIZE_K": 256,
-        "GROUP_SIZE_M": 1,
-        "num_warps": 2,
-        "num_stages": 2,
-        "waves_per_eu": 0
-    },
-    "256": {
-        "BLOCK_SIZE_M": 16,
-        "BLOCK_SIZE_N": 64,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 4,
-        "num_warps": 4,
-        "num_stages": 2,
-        "waves_per_eu": 0
-    },
-    "512": {
-        "BLOCK_SIZE_M": 32,
-        "BLOCK_SIZE_N": 256,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 8,
-        "num_stages": 2,
-        "waves_per_eu": 0
-    },
-    "1024": {
-        "BLOCK_SIZE_M": 64,
-        "BLOCK_SIZE_N": 256,
-        "BLOCK_SIZE_K": 128,
-        "GROUP_SIZE_M": 8,
-        "num_warps": 8,
-        "num_stages": 2,
-        "waves_per_eu": 0
-    }
-}

From 4fe44788fd8c2636eb7a741ebe5038fd2b46731e Mon Sep 17 00:00:00 2001
From: Randall Smith <Randall.Smith@amd.com>
Date: Fri, 14 Feb 2025 21:52:55 -0600
Subject: [PATCH 10/10] move tunings

Signed-off-by: Randall Smith <Randall.Smith@amd.com>
---
 ...AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} | 0
 ...AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} | 0
 ...AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} | 0
 ...AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} | 0
 ...AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} | 0
 ...AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} | 0
 ...AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} | 0
 ...AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} | 0
 ...AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} | 0
 ...AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} | 0
 ...AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} | 0
 ...AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} | 0
 ...AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} | 0
 ...AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} | 0
 ...AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} | 0
 ...AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} | 0
 ...AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} | 0
 ...AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} | 0
 ...AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} | 0
 ...AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} | 0
 ...AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} | 0
 ...AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} | 0
 ...AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} | 0
 ...AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} | 0
 24 files changed, 0 insertions(+), 0 deletions(-)
 rename vllm/model_executor/layers/quantization/utils/configs/{N=1536,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json => N=1536,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} (100%)
 rename vllm/model_executor/layers/quantization/utils/configs/{N=1536,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json => N=1536,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} (100%)
 rename vllm/model_executor/layers/quantization/utils/configs/{N=2048,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json => N=2048,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} (100%)
 rename vllm/model_executor/layers/quantization/utils/configs/{N=2304,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json => N=2304,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} (100%)
 rename vllm/model_executor/layers/quantization/utils/configs/{N=24576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json => N=24576,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} (100%)
 rename vllm/model_executor/layers/quantization/utils/configs/{N=256,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json => N=256,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} (100%)
 rename vllm/model_executor/layers/quantization/utils/configs/{N=3072,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json => N=3072,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} (100%)
 rename vllm/model_executor/layers/quantization/utils/configs/{N=3072,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json => N=3072,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} (100%)
 rename vllm/model_executor/layers/quantization/utils/configs/{N=32768,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json => N=32768,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} (100%)
 rename vllm/model_executor/layers/quantization/utils/configs/{N=36864,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json => N=36864,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} (100%)
 rename vllm/model_executor/layers/quantization/utils/configs/{N=4096,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json => N=4096,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} (100%)
 rename vllm/model_executor/layers/quantization/utils/configs/{N=4608,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json => N=4608,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} (100%)
 rename vllm/model_executor/layers/quantization/utils/configs/{N=512,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json => N=512,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} (100%)
 rename vllm/model_executor/layers/quantization/utils/configs/{N=576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json => N=576,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} (100%)
 rename vllm/model_executor/layers/quantization/utils/configs/{N=7168,K=1024,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json => N=7168,K=1024,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} (100%)
 rename vllm/model_executor/layers/quantization/utils/configs/{N=7168,K=1152,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json => N=7168,K=1152,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} (100%)
 rename vllm/model_executor/layers/quantization/utils/configs/{N=7168,K=128,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json => N=7168,K=128,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} (100%)
 rename vllm/model_executor/layers/quantization/utils/configs/{N=7168,K=16384,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json => N=7168,K=16384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} (100%)
 rename vllm/model_executor/layers/quantization/utils/configs/{N=7168,K=18432,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json => N=7168,K=18432,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} (100%)
 rename vllm/model_executor/layers/quantization/utils/configs/{N=7168,K=2048,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json => N=7168,K=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} (100%)
 rename vllm/model_executor/layers/quantization/utils/configs/{N=7168,K=2304,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json => N=7168,K=2304,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} (100%)
 rename vllm/model_executor/layers/quantization/utils/configs/{N=7168,K=256,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json => N=7168,K=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} (100%)
 rename vllm/model_executor/layers/quantization/utils/configs/{N=7168,K=8192,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json => N=7168,K=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} (100%)
 rename vllm/model_executor/layers/quantization/utils/configs/{N=8192,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json => N=8192,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json} (100%)

diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
similarity index 100%
rename from vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
rename to vllm/model_executor/layers/quantization/utils/configs/N=1536,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
similarity index 100%
rename from vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
rename to vllm/model_executor/layers/quantization/utils/configs/N=1536,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
similarity index 100%
rename from vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
rename to vllm/model_executor/layers/quantization/utils/configs/N=2048,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
similarity index 100%
rename from vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
rename to vllm/model_executor/layers/quantization/utils/configs/N=2304,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
similarity index 100%
rename from vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
rename to vllm/model_executor/layers/quantization/utils/configs/N=24576,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
similarity index 100%
rename from vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
rename to vllm/model_executor/layers/quantization/utils/configs/N=256,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
similarity index 100%
rename from vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
rename to vllm/model_executor/layers/quantization/utils/configs/N=3072,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
similarity index 100%
rename from vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
rename to vllm/model_executor/layers/quantization/utils/configs/N=3072,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
similarity index 100%
rename from vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
rename to vllm/model_executor/layers/quantization/utils/configs/N=32768,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
similarity index 100%
rename from vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
rename to vllm/model_executor/layers/quantization/utils/configs/N=36864,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
similarity index 100%
rename from vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
rename to vllm/model_executor/layers/quantization/utils/configs/N=4096,K=512,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
similarity index 100%
rename from vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
rename to vllm/model_executor/layers/quantization/utils/configs/N=4608,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
similarity index 100%
rename from vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
rename to vllm/model_executor/layers/quantization/utils/configs/N=512,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
similarity index 100%
rename from vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
rename to vllm/model_executor/layers/quantization/utils/configs/N=576,K=7168,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
similarity index 100%
rename from vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
rename to vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1024,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
similarity index 100%
rename from vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
rename to vllm/model_executor/layers/quantization/utils/configs/N=7168,K=1152,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
similarity index 100%
rename from vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
rename to vllm/model_executor/layers/quantization/utils/configs/N=7168,K=128,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
similarity index 100%
rename from vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
rename to vllm/model_executor/layers/quantization/utils/configs/N=7168,K=16384,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
similarity index 100%
rename from vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
rename to vllm/model_executor/layers/quantization/utils/configs/N=7168,K=18432,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
similarity index 100%
rename from vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
rename to vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2048,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
similarity index 100%
rename from vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
rename to vllm/model_executor/layers/quantization/utils/configs/N=7168,K=2304,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
similarity index 100%
rename from vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
rename to vllm/model_executor/layers/quantization/utils/configs/N=7168,K=256,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
similarity index 100%
rename from vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
rename to vllm/model_executor/layers/quantization/utils/configs/N=7168,K=8192,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
diff --git a/vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json b/vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json
similarity index 100%
rename from vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300X_OAM,dtype=fp8_w8a8,block_shape=[128,128].json
rename to vllm/model_executor/layers/quantization/utils/configs/N=8192,K=1536,device_name=AMD_Instinct_MI300X,dtype=fp8_w8a8,block_shape=[128,128].json