Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion clients/python/llmengine/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

__version__ = "0.0.0beta40"
__version__ = "0.0.0beta42"

import os
from typing import Sequence
Expand Down
13 changes: 13 additions & 0 deletions clients/python/llmengine/completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,12 +7,15 @@
CompletionStreamV1Request,
CompletionSyncResponse,
CompletionSyncV1Request,
CpuSpecificationType,
CreateBatchCompletionsModelConfig,
CreateBatchCompletionsV1Request,
CreateBatchCompletionsV1RequestContent,
CreateBatchCompletionsV1Response,
CreateBatchCompletionsV2Request,
CreateBatchCompletionsV2Response,
GpuType,
StorageSpecificationType,
ToolConfig,
)

Expand Down Expand Up @@ -486,6 +489,11 @@ def batch_create(
priority: Optional[str] = None,
use_v2: bool = False,
tool_config: Optional[ToolConfig] = None,
cpus: Optional[CpuSpecificationType] = None,
gpus: Optional[int] = None,
memory: Optional[StorageSpecificationType] = None,
gpu_type: Optional[GpuType] = None,
storage: Optional[StorageSpecificationType] = None,
request_headers: Optional[Dict[str, str]] = None,
) -> Union[CreateBatchCompletionsV1Response, CreateBatchCompletionsV2Response]:
"""
Expand Down Expand Up @@ -636,6 +644,11 @@ def batch_create(
max_runtime_sec=max_runtime_sec,
tool_config=tool_config,
priority=priority,
cpus=cpus,
gpus=gpus,
memory=memory,
gpu_type=gpu_type,
storage=storage,
).dict()
response = cls.post_sync(
resource_name="v2/batch-completions",
Expand Down
20 changes: 20 additions & 0 deletions clients/python/llmengine/data_types/batch_completion.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .chat_completion import ChatCompletionV2Request, ChatCompletionV2Response
from .completion import CompletionOutput, CompletionV2Request, CompletionV2Response
from .pydantic_types import BaseModel, Field
from .rest import CpuSpecificationType, GpuType, StorageSpecificationType


# Common DTOs for batch completions
Expand Down Expand Up @@ -105,6 +106,25 @@ class BatchCompletionsRequestBase(BaseModel):
NOTE: this config is highly experimental and signature will change significantly in future iterations.""",
)

cpus: Optional[CpuSpecificationType] = Field(
default=None, description="CPUs to use for the batch inference."
)
gpus: Optional[int] = Field(
default=None, description="Number of GPUs to use for the batch inference."
)
memory: Optional[StorageSpecificationType] = Field(
default=None, description="Amount of memory to use for the batch inference."
)
gpu_type: Optional[GpuType] = Field(
default=None, description="GPU type to use for the batch inference."
)
storage: Optional[StorageSpecificationType] = Field(
default=None, description="Storage to use for the batch inference."
)
nodes_per_worker: Optional[int] = Field(
default=None, description="Number of nodes per worker for the batch inference."
)


# V1 DTOs for batch completions
CompletionV1Output = CompletionOutput
Expand Down
2 changes: 1 addition & 1 deletion clients/python/pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "scale-llm-engine"
version = "0.0.0.beta40"
version = "0.0.0.beta42"
description = "Scale LLM Engine Python client"
license = "Apache-2.0"
authors = ["Phil Chen <[email protected]>"]
Expand Down
2 changes: 1 addition & 1 deletion clients/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
setup(
name="scale-llm-engine",
python_requires=">=3.8",
version="0.0.0.beta40",
version="0.0.0.beta42",
packages=find_packages(),
package_data={"llmengine": ["py.typed"]},
)