From cec04076d86947681af950346bbfcbac60642b9c Mon Sep 17 00:00:00 2001 From: Michael Choi Date: Fri, 11 Oct 2024 00:47:51 +0000 Subject: [PATCH 1/3] Add hardware spec to client --- clients/python/llmengine/completion.py | 16 ++++++++++++ .../llmengine/data_types/batch_completion.py | 25 +++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/clients/python/llmengine/completion.py b/clients/python/llmengine/completion.py index 29617f26..c6b68b07 100644 --- a/clients/python/llmengine/completion.py +++ b/clients/python/llmengine/completion.py @@ -16,6 +16,12 @@ ToolConfig, ) +from clients.python.llmengine.data_types.rest import ( + CpuSpecificationType, + GpuType, + StorageSpecificationType, +) + COMPLETION_TIMEOUT = 300 HTTP_TIMEOUT = 60 @@ -486,6 +492,11 @@ def batch_create( priority: Optional[str] = None, use_v2: bool = False, tool_config: Optional[ToolConfig] = None, + cpus: Optional[CpuSpecificationType] = None, + gpus: Optional[int] = None, + memory: Optional[StorageSpecificationType] = None, + gpu_type: Optional[GpuType] = None, + storage: Optional[StorageSpecificationType] = None, request_headers: Optional[Dict[str, str]] = None, ) -> Union[CreateBatchCompletionsV1Response, CreateBatchCompletionsV2Response]: """ @@ -636,6 +647,11 @@ def batch_create( max_runtime_sec=max_runtime_sec, tool_config=tool_config, priority=priority, + cpus=cpus, + gpus=gpus, + memory=memory, + gpu_type=gpu_type, + storage=storage, ).dict() response = cls.post_sync( resource_name="v2/batch-completions", diff --git a/clients/python/llmengine/data_types/batch_completion.py b/clients/python/llmengine/data_types/batch_completion.py index 6c14fcce..043cab3c 100644 --- a/clients/python/llmengine/data_types/batch_completion.py +++ b/clients/python/llmengine/data_types/batch_completion.py @@ -3,6 +3,12 @@ from typing_extensions import TypeAlias +from clients.python.llmengine.data_types.rest import ( + CpuSpecificationType, + GpuType, + StorageSpecificationType, +) + from .chat_completion import ChatCompletionV2Request, ChatCompletionV2Response from .completion import CompletionOutput, CompletionV2Request, CompletionV2Response from .pydantic_types import BaseModel, Field @@ -105,6 +111,25 @@ class BatchCompletionsRequestBase(BaseModel): NOTE: this config is highly experimental and signature will change significantly in future iterations.""", ) + cpus: Optional[CpuSpecificationType] = Field( + default=None, description="CPUs to use for the batch inference." + ) + gpus: Optional[int] = Field( + default=None, description="Number of GPUs to use for the batch inference." + ) + memory: Optional[StorageSpecificationType] = Field( + default=None, description="Amount of memory to use for the batch inference." + ) + gpu_type: Optional[GpuType] = Field( + default=None, description="GPU type to use for the batch inference." + ) + storage: Optional[StorageSpecificationType] = Field( + default=None, description="Storage to use for the batch inference." + ) + nodes_per_worker: Optional[int] = Field( + default=None, description="Number of nodes per worker for the batch inference." + ) + # V1 DTOs for batch completions CompletionV1Output = CompletionOutput From e895a13926ca249c1558bf1dd4fdb5be139f79d6 Mon Sep 17 00:00:00 2001 From: Michael Choi Date: Fri, 11 Oct 2024 14:18:40 +0000 Subject: [PATCH 2/3] fix import and update version --- clients/python/llmengine/__init__.py | 2 +- clients/python/llmengine/data_types/batch_completion.py | 7 +------ clients/python/pyproject.toml | 2 +- clients/python/setup.py | 2 +- 4 files changed, 4 insertions(+), 9 deletions(-) diff --git a/clients/python/llmengine/__init__.py b/clients/python/llmengine/__init__.py index 21bed5ce..10c2788d 100644 --- a/clients/python/llmengine/__init__.py +++ b/clients/python/llmengine/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "0.0.0beta40" +__version__ = "0.0.0beta41" import os from typing import Sequence diff --git a/clients/python/llmengine/data_types/batch_completion.py b/clients/python/llmengine/data_types/batch_completion.py index 043cab3c..d72a3e82 100644 --- a/clients/python/llmengine/data_types/batch_completion.py +++ b/clients/python/llmengine/data_types/batch_completion.py @@ -3,15 +3,10 @@ from typing_extensions import TypeAlias -from clients.python.llmengine.data_types.rest import ( - CpuSpecificationType, - GpuType, - StorageSpecificationType, -) - from .chat_completion import ChatCompletionV2Request, ChatCompletionV2Response from .completion import CompletionOutput, CompletionV2Request, CompletionV2Response from .pydantic_types import BaseModel, Field +from .rest import CpuSpecificationType, GpuType, StorageSpecificationType # Common DTOs for batch completions diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml index 9f963abb..4e5b6d85 100644 --- a/clients/python/pyproject.toml +++ b/clients/python/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "scale-llm-engine" -version = "0.0.0.beta40" +version = "0.0.0.beta41" description = "Scale LLM Engine Python client" license = "Apache-2.0" authors = ["Phil Chen "] diff --git a/clients/python/setup.py b/clients/python/setup.py index ea6c5e02..d327f31f 100644 --- a/clients/python/setup.py +++ b/clients/python/setup.py @@ -3,7 +3,7 @@ setup( name="scale-llm-engine", python_requires=">=3.8", - version="0.0.0.beta40", + version="0.0.0.beta41", packages=find_packages(), package_data={"llmengine": ["py.typed"]}, ) From 98b6ea6c37934c4aaf167723d7624c7a0bd40743 Mon Sep 17 00:00:00 2001 From: Michael Choi Date: Fri, 11 Oct 2024 14:24:01 +0000 Subject: [PATCH 3/3] fix import and update version --- clients/python/llmengine/__init__.py | 2 +- clients/python/llmengine/completion.py | 7 ++----- clients/python/pyproject.toml | 2 +- clients/python/setup.py | 2 +- 4 files changed, 5 insertions(+), 8 deletions(-) diff --git a/clients/python/llmengine/__init__.py b/clients/python/llmengine/__init__.py index 10c2788d..86f99642 100644 --- a/clients/python/llmengine/__init__.py +++ b/clients/python/llmengine/__init__.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -__version__ = "0.0.0beta41" +__version__ = "0.0.0beta42" import os from typing import Sequence diff --git a/clients/python/llmengine/completion.py b/clients/python/llmengine/completion.py index c6b68b07..8f972e91 100644 --- a/clients/python/llmengine/completion.py +++ b/clients/python/llmengine/completion.py @@ -7,19 +7,16 @@ CompletionStreamV1Request, CompletionSyncResponse, CompletionSyncV1Request, + CpuSpecificationType, CreateBatchCompletionsModelConfig, CreateBatchCompletionsV1Request, CreateBatchCompletionsV1RequestContent, CreateBatchCompletionsV1Response, CreateBatchCompletionsV2Request, CreateBatchCompletionsV2Response, - ToolConfig, -) - -from clients.python.llmengine.data_types.rest import ( - CpuSpecificationType, GpuType, StorageSpecificationType, + ToolConfig, ) COMPLETION_TIMEOUT = 300 diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml index 4e5b6d85..9a150429 100644 --- a/clients/python/pyproject.toml +++ b/clients/python/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "scale-llm-engine" -version = "0.0.0.beta41" +version = "0.0.0.beta42" description = "Scale LLM Engine Python client" license = "Apache-2.0" authors = ["Phil Chen "] diff --git a/clients/python/setup.py b/clients/python/setup.py index d327f31f..a1fc8bee 100644 --- a/clients/python/setup.py +++ b/clients/python/setup.py @@ -3,7 +3,7 @@ setup( name="scale-llm-engine", python_requires=">=3.8", - version="0.0.0.beta41", + version="0.0.0.beta42", packages=find_packages(), package_data={"llmengine": ["py.typed"]}, )