scaleapi · dmchoiboi · Oct 11, 2024 · Oct 11, 2024 · Oct 11, 2024 · Oct 11, 2024
diff --git a/clients/python/llmengine/__init__.py b/clients/python/llmengine/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "0.0.0beta40"
+__version__ = "0.0.0beta42"
 
 import os
 from typing import Sequence

diff --git a/clients/python/llmengine/completion.py b/clients/python/llmengine/completion.py
@@ -7,12 +7,15 @@
     CompletionStreamV1Request,
     CompletionSyncResponse,
     CompletionSyncV1Request,
+    CpuSpecificationType,
     CreateBatchCompletionsModelConfig,
     CreateBatchCompletionsV1Request,
     CreateBatchCompletionsV1RequestContent,
     CreateBatchCompletionsV1Response,
     CreateBatchCompletionsV2Request,
     CreateBatchCompletionsV2Response,
+    GpuType,
+    StorageSpecificationType,
     ToolConfig,
 )
 
@@ -486,6 +489,11 @@ def batch_create(
         priority: Optional[str] = None,
         use_v2: bool = False,
         tool_config: Optional[ToolConfig] = None,
+        cpus: Optional[CpuSpecificationType] = None,
+        gpus: Optional[int] = None,
+        memory: Optional[StorageSpecificationType] = None,
+        gpu_type: Optional[GpuType] = None,
+        storage: Optional[StorageSpecificationType] = None,
         request_headers: Optional[Dict[str, str]] = None,
     ) -> Union[CreateBatchCompletionsV1Response, CreateBatchCompletionsV2Response]:
         """
@@ -636,6 +644,11 @@ def batch_create(
                 max_runtime_sec=max_runtime_sec,
                 tool_config=tool_config,
                 priority=priority,
+                cpus=cpus,
+                gpus=gpus,
+                memory=memory,
+                gpu_type=gpu_type,
+                storage=storage,
             ).dict()
             response = cls.post_sync(
                 resource_name="v2/batch-completions",

diff --git a/clients/python/llmengine/data_types/batch_completion.py b/clients/python/llmengine/data_types/batch_completion.py
@@ -6,6 +6,7 @@
 from .chat_completion import ChatCompletionV2Request, ChatCompletionV2Response
 from .completion import CompletionOutput, CompletionV2Request, CompletionV2Response
 from .pydantic_types import BaseModel, Field
+from .rest import CpuSpecificationType, GpuType, StorageSpecificationType
 
 
 # Common DTOs for batch completions
@@ -105,6 +106,25 @@ class BatchCompletionsRequestBase(BaseModel):
 NOTE: this config is highly experimental and signature will change significantly in future iterations.""",
     )
 
+    cpus: Optional[CpuSpecificationType] = Field(
+        default=None, description="CPUs to use for the batch inference."
+    )
+    gpus: Optional[int] = Field(
+        default=None, description="Number of GPUs to use for the batch inference."
+    )
+    memory: Optional[StorageSpecificationType] = Field(
+        default=None, description="Amount of memory to use for the batch inference."
+    )
+    gpu_type: Optional[GpuType] = Field(
+        default=None, description="GPU type to use for the batch inference."
+    )
+    storage: Optional[StorageSpecificationType] = Field(
+        default=None, description="Storage to use for the batch inference."
+    )
+    nodes_per_worker: Optional[int] = Field(
+        default=None, description="Number of nodes per worker for the batch inference."
+    )
+
 
 # V1 DTOs for batch completions
 CompletionV1Output = CompletionOutput

diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "scale-llm-engine"
-version = "0.0.0.beta40"
+version = "0.0.0.beta42"
 description = "Scale LLM Engine Python client"
 license = "Apache-2.0"
 authors = ["Phil Chen <[email protected]>"]

diff --git a/clients/python/setup.py b/clients/python/setup.py
@@ -3,7 +3,7 @@
 setup(
     name="scale-llm-engine",
     python_requires=">=3.8",
-    version="0.0.0.beta40",
+    version="0.0.0.beta42",
     packages=find_packages(),
     package_data={"llmengine": ["py.typed"]},
 )