From 42d771e5ac5c152df37cfdf8ece2c95976c9a22a Mon Sep 17 00:00:00 2001
From: Zhanwen Chen <phil.zhanwen.chen@gmail.com>
Date: Mon, 3 Mar 2025 10:07:09 -0500
Subject: [PATCH] Use math.prod instead of np.prod for trivial ops

Use Python 3.8 math.prod for lighter imports and overheads. See https://docs.python.org/3/library/math.html#math.prod
---
 vllm/worker/cache_engine.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/vllm/worker/cache_engine.py b/vllm/worker/cache_engine.py
index 3960392cf74e..004b4e4b757f 100644
--- a/vllm/worker/cache_engine.py
+++ b/vllm/worker/cache_engine.py
@@ -1,8 +1,8 @@
 # SPDX-License-Identifier: Apache-2.0
 """CacheEngine class for managing the KV cache."""
+from math import prod
 from typing import List
 
-import numpy as np
 import torch
 
 from vllm import envs
@@ -90,7 +90,7 @@ def _allocate_kv_cache(
             # NOTE this assumption currently only holds for MLA so we only apply
             # this optimization when `use_mla` is true
             entry_shape = kv_cache_shape[2:]
-            entry_size = np.prod(entry_shape)
+            entry_size = prod(entry_shape)
             alloc_entry_size = align_to_256bytes(entry_size, self.dtype)
             alloc_shape = (*kv_cache_shape[:2], alloc_entry_size)
         else: