From cec04076d86947681af950346bbfcbac60642b9c Mon Sep 17 00:00:00 2001
From: Michael Choi <michael.choi@scale.com>
Date: Fri, 11 Oct 2024 00:47:51 +0000
Subject: [PATCH 1/3] Add hardware spec to client

---
 clients/python/llmengine/completion.py        | 16 ++++++++++++
 .../llmengine/data_types/batch_completion.py  | 25 +++++++++++++++++++
 2 files changed, 41 insertions(+)

diff --git a/clients/python/llmengine/completion.py b/clients/python/llmengine/completion.py
index 29617f26..c6b68b07 100644
--- a/clients/python/llmengine/completion.py
+++ b/clients/python/llmengine/completion.py
@@ -16,6 +16,12 @@
     ToolConfig,
 )
 
+from clients.python.llmengine.data_types.rest import (
+    CpuSpecificationType,
+    GpuType,
+    StorageSpecificationType,
+)
+
 COMPLETION_TIMEOUT = 300
 HTTP_TIMEOUT = 60
 
@@ -486,6 +492,11 @@ def batch_create(
         priority: Optional[str] = None,
         use_v2: bool = False,
         tool_config: Optional[ToolConfig] = None,
+        cpus: Optional[CpuSpecificationType] = None,
+        gpus: Optional[int] = None,
+        memory: Optional[StorageSpecificationType] = None,
+        gpu_type: Optional[GpuType] = None,
+        storage: Optional[StorageSpecificationType] = None,
         request_headers: Optional[Dict[str, str]] = None,
     ) -> Union[CreateBatchCompletionsV1Response, CreateBatchCompletionsV2Response]:
         """
@@ -636,6 +647,11 @@ def batch_create(
                 max_runtime_sec=max_runtime_sec,
                 tool_config=tool_config,
                 priority=priority,
+                cpus=cpus,
+                gpus=gpus,
+                memory=memory,
+                gpu_type=gpu_type,
+                storage=storage,
             ).dict()
             response = cls.post_sync(
                 resource_name="v2/batch-completions",
diff --git a/clients/python/llmengine/data_types/batch_completion.py b/clients/python/llmengine/data_types/batch_completion.py
index 6c14fcce..043cab3c 100644
--- a/clients/python/llmengine/data_types/batch_completion.py
+++ b/clients/python/llmengine/data_types/batch_completion.py
@@ -3,6 +3,12 @@
 
 from typing_extensions import TypeAlias
 
+from clients.python.llmengine.data_types.rest import (
+    CpuSpecificationType,
+    GpuType,
+    StorageSpecificationType,
+)
+
 from .chat_completion import ChatCompletionV2Request, ChatCompletionV2Response
 from .completion import CompletionOutput, CompletionV2Request, CompletionV2Response
 from .pydantic_types import BaseModel, Field
@@ -105,6 +111,25 @@ class BatchCompletionsRequestBase(BaseModel):
 NOTE: this config is highly experimental and signature will change significantly in future iterations.""",
     )
 
+    cpus: Optional[CpuSpecificationType] = Field(
+        default=None, description="CPUs to use for the batch inference."
+    )
+    gpus: Optional[int] = Field(
+        default=None, description="Number of GPUs to use for the batch inference."
+    )
+    memory: Optional[StorageSpecificationType] = Field(
+        default=None, description="Amount of memory to use for the batch inference."
+    )
+    gpu_type: Optional[GpuType] = Field(
+        default=None, description="GPU type to use for the batch inference."
+    )
+    storage: Optional[StorageSpecificationType] = Field(
+        default=None, description="Storage to use for the batch inference."
+    )
+    nodes_per_worker: Optional[int] = Field(
+        default=None, description="Number of nodes per worker for the batch inference."
+    )
+
 
 # V1 DTOs for batch completions
 CompletionV1Output = CompletionOutput

From e895a13926ca249c1558bf1dd4fdb5be139f79d6 Mon Sep 17 00:00:00 2001
From: Michael Choi <michael.choi@scale.com>
Date: Fri, 11 Oct 2024 14:18:40 +0000
Subject: [PATCH 2/3] fix import and update version

---
 clients/python/llmengine/__init__.py                    | 2 +-
 clients/python/llmengine/data_types/batch_completion.py | 7 +------
 clients/python/pyproject.toml                           | 2 +-
 clients/python/setup.py                                 | 2 +-
 4 files changed, 4 insertions(+), 9 deletions(-)

diff --git a/clients/python/llmengine/__init__.py b/clients/python/llmengine/__init__.py
index 21bed5ce..10c2788d 100644
--- a/clients/python/llmengine/__init__.py
+++ b/clients/python/llmengine/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "0.0.0beta40"
+__version__ = "0.0.0beta41"
 
 import os
 from typing import Sequence
diff --git a/clients/python/llmengine/data_types/batch_completion.py b/clients/python/llmengine/data_types/batch_completion.py
index 043cab3c..d72a3e82 100644
--- a/clients/python/llmengine/data_types/batch_completion.py
+++ b/clients/python/llmengine/data_types/batch_completion.py
@@ -3,15 +3,10 @@
 
 from typing_extensions import TypeAlias
 
-from clients.python.llmengine.data_types.rest import (
-    CpuSpecificationType,
-    GpuType,
-    StorageSpecificationType,
-)
-
 from .chat_completion import ChatCompletionV2Request, ChatCompletionV2Response
 from .completion import CompletionOutput, CompletionV2Request, CompletionV2Response
 from .pydantic_types import BaseModel, Field
+from .rest import CpuSpecificationType, GpuType, StorageSpecificationType
 
 
 # Common DTOs for batch completions
diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml
index 9f963abb..4e5b6d85 100644
--- a/clients/python/pyproject.toml
+++ b/clients/python/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "scale-llm-engine"
-version = "0.0.0.beta40"
+version = "0.0.0.beta41"
 description = "Scale LLM Engine Python client"
 license = "Apache-2.0"
 authors = ["Phil Chen <phil.chen@scale.com>"]
diff --git a/clients/python/setup.py b/clients/python/setup.py
index ea6c5e02..d327f31f 100644
--- a/clients/python/setup.py
+++ b/clients/python/setup.py
@@ -3,7 +3,7 @@
 setup(
     name="scale-llm-engine",
     python_requires=">=3.8",
-    version="0.0.0.beta40",
+    version="0.0.0.beta41",
     packages=find_packages(),
     package_data={"llmengine": ["py.typed"]},
 )

From 98b6ea6c37934c4aaf167723d7624c7a0bd40743 Mon Sep 17 00:00:00 2001
From: Michael Choi <michael.choi@scale.com>
Date: Fri, 11 Oct 2024 14:24:01 +0000
Subject: [PATCH 3/3] fix import and update version

---
 clients/python/llmengine/__init__.py   | 2 +-
 clients/python/llmengine/completion.py | 7 ++-----
 clients/python/pyproject.toml          | 2 +-
 clients/python/setup.py                | 2 +-
 4 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/clients/python/llmengine/__init__.py b/clients/python/llmengine/__init__.py
index 10c2788d..86f99642 100644
--- a/clients/python/llmengine/__init__.py
+++ b/clients/python/llmengine/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "0.0.0beta41"
+__version__ = "0.0.0beta42"
 
 import os
 from typing import Sequence
diff --git a/clients/python/llmengine/completion.py b/clients/python/llmengine/completion.py
index c6b68b07..8f972e91 100644
--- a/clients/python/llmengine/completion.py
+++ b/clients/python/llmengine/completion.py
@@ -7,19 +7,16 @@
     CompletionStreamV1Request,
     CompletionSyncResponse,
     CompletionSyncV1Request,
+    CpuSpecificationType,
     CreateBatchCompletionsModelConfig,
     CreateBatchCompletionsV1Request,
     CreateBatchCompletionsV1RequestContent,
     CreateBatchCompletionsV1Response,
     CreateBatchCompletionsV2Request,
     CreateBatchCompletionsV2Response,
-    ToolConfig,
-)
-
-from clients.python.llmengine.data_types.rest import (
-    CpuSpecificationType,
     GpuType,
     StorageSpecificationType,
+    ToolConfig,
 )
 
 COMPLETION_TIMEOUT = 300
diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml
index 4e5b6d85..9a150429 100644
--- a/clients/python/pyproject.toml
+++ b/clients/python/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "scale-llm-engine"
-version = "0.0.0.beta41"
+version = "0.0.0.beta42"
 description = "Scale LLM Engine Python client"
 license = "Apache-2.0"
 authors = ["Phil Chen <phil.chen@scale.com>"]
diff --git a/clients/python/setup.py b/clients/python/setup.py
index d327f31f..a1fc8bee 100644
--- a/clients/python/setup.py
+++ b/clients/python/setup.py
@@ -3,7 +3,7 @@
 setup(
     name="scale-llm-engine",
     python_requires=">=3.8",
-    version="0.0.0.beta41",
+    version="0.0.0.beta42",
     packages=find_packages(),
     package_data={"llmengine": ["py.typed"]},
 )