diff --git a/sample/sagemaker/2017-07-24/service-2.json b/sample/sagemaker/2017-07-24/service-2.json
index 301512a..9dcb41a 100644
--- a/sample/sagemaker/2017-07-24/service-2.json
+++ b/sample/sagemaker/2017-07-24/service-2.json
@@ -12446,7 +12446,7 @@
},
"PlatformIdentifier":{
"shape":"PlatformIdentifier",
- "documentation":"
The platform identifier of the notebook instance runtime environment.
"
+ "documentation":"The platform identifier of the notebook instance runtime environment. The default value is notebook-al2-v2.
"
},
"InstanceMetadataServiceConfiguration":{
"shape":"InstanceMetadataServiceConfiguration",
@@ -21172,6 +21172,7 @@
},
"documentation":"The configuration parameters that specify the IAM roles assumed by the execution role of SageMaker (assumable roles) and the cluster instances or job execution environments (execution roles or runtime roles) to manage and access resources required for running Amazon EMR clusters or Amazon EMR Serverless applications.
"
},
+ "EnableCaching":{"type":"boolean"},
"EnableCapture":{"type":"boolean"},
"EnableInfraCheck":{
"type":"boolean",
@@ -24706,6 +24707,30 @@
"box":true,
"min":0
},
+ "InferenceComponentDataCacheConfig":{
+ "type":"structure",
+ "required":["EnableCaching"],
+ "members":{
+ "EnableCaching":{
+ "shape":"EnableCaching",
+ "documentation":"Sets whether the endpoint that hosts the inference component caches the model artifacts and container image.
With caching enabled, the endpoint caches this data in each instance that it provisions for the inference component. That way, the inference component deploys faster during the auto scaling process. If caching isn't enabled, the inference component takes longer to deploy because of the time it spends downloading the data.
",
+ "box":true
+ }
+ },
+ "documentation":"Settings that affect how the inference component caches data.
"
+ },
+ "InferenceComponentDataCacheConfigSummary":{
+ "type":"structure",
+ "required":["EnableCaching"],
+ "members":{
+ "EnableCaching":{
+ "shape":"EnableCaching",
+ "documentation":"Indicates whether the inference component caches model artifacts as part of the auto scaling process.
",
+ "box":true
+ }
+ },
+ "documentation":"Settings that affect how the inference component caches data.
"
+ },
"InferenceComponentDeploymentConfig":{
"type":"structure",
"required":["RollingUpdatePolicy"],
@@ -24811,6 +24836,10 @@
"BaseInferenceComponentName":{
"shape":"InferenceComponentName",
"documentation":"The name of an existing inference component that is to contain the inference component that you're creating with your request.
Specify this parameter only if your request is meant to create an adapter inference component. An adapter inference component contains the path to an adapter model. The purpose of the adapter model is to tailor the inference output of a base foundation model, which is hosted by the base inference component. The adapter inference component uses the compute resources that you assigned to the base inference component.
When you create an adapter inference component, use the Container parameter to specify the location of the adapter artifacts. In the parameter value, use the ArtifactUrl parameter of the InferenceComponentContainerSpecification data type.
Before you can create an adapter inference component, you must have an existing inference component that contains the foundation model that you want to adapt.
"
+ },
+ "DataCacheConfig":{
+ "shape":"InferenceComponentDataCacheConfig",
+ "documentation":"Settings that affect how the inference component caches data.
"
}
},
"documentation":"Details about the resources to deploy with this inference component, including the model, container, and compute resources.
"
@@ -24837,6 +24866,10 @@
"BaseInferenceComponentName":{
"shape":"InferenceComponentName",
"documentation":"The name of the base inference component that contains this inference component.
"
+ },
+ "DataCacheConfig":{
+ "shape":"InferenceComponentDataCacheConfigSummary",
+ "documentation":"Settings that affect how the inference component caches data.
"
}
},
"documentation":"Details about the resources that are deployed with this inference component.
"
@@ -36136,7 +36169,7 @@
},
"S3DataDistributionType":{
"shape":"ProcessingS3DataDistributionType",
- "documentation":"Whether to distribute the data from Amazon S3 to all processing instances with FullyReplicated, or whether the data from Amazon S3 is shared by Amazon S3 key, downloading one shard of data to each processing instance.
"
+ "documentation":"Whether to distribute the data from Amazon S3 to all processing instances with FullyReplicated, or whether the data from Amazon S3 is sharded by Amazon S3 key, downloading one shard of data to each processing instance.
"
},
"S3CompressionType":{
"shape":"ProcessingS3CompressionType",
diff --git a/src/sagemaker_core/main/code_injection/shape_dag.py b/src/sagemaker_core/main/code_injection/shape_dag.py
index ded29a6..0ade0b6 100644
--- a/src/sagemaker_core/main/code_injection/shape_dag.py
+++ b/src/sagemaker_core/main/code_injection/shape_dag.py
@@ -8365,6 +8365,14 @@
],
"type": "structure",
},
+ "InferenceComponentDataCacheConfig": {
+ "members": [{"name": "EnableCaching", "shape": "EnableCaching", "type": "boolean"}],
+ "type": "structure",
+ },
+ "InferenceComponentDataCacheConfigSummary": {
+ "members": [{"name": "EnableCaching", "shape": "EnableCaching", "type": "boolean"}],
+ "type": "structure",
+ },
"InferenceComponentDeploymentConfig": {
"members": [
{
@@ -8437,6 +8445,11 @@
"shape": "InferenceComponentName",
"type": "string",
},
+ {
+ "name": "DataCacheConfig",
+ "shape": "InferenceComponentDataCacheConfig",
+ "type": "structure",
+ },
],
"type": "structure",
},
@@ -8463,6 +8476,11 @@
"shape": "InferenceComponentName",
"type": "string",
},
+ {
+ "name": "DataCacheConfig",
+ "shape": "InferenceComponentDataCacheConfigSummary",
+ "type": "structure",
+ },
],
"type": "structure",
},
diff --git a/src/sagemaker_core/main/resources.py b/src/sagemaker_core/main/resources.py
index f7d879b..7e3ee68 100644
--- a/src/sagemaker_core/main/resources.py
+++ b/src/sagemaker_core/main/resources.py
@@ -22677,7 +22677,7 @@ def create(
default_code_repository: A Git repository to associate with the notebook instance as its default code repository. This can be either the name of a Git repository stored as a resource in your account, or the URL of a Git repository in Amazon Web Services CodeCommit or in any other Git repository. When you open a notebook instance, it opens in the directory that contains this repository. For more information, see Associating Git Repositories with SageMaker AI Notebook Instances.
additional_code_repositories: An array of up to three Git repositories to associate with the notebook instance. These can be either the names of Git repositories stored as resources in your account, or the URL of Git repositories in Amazon Web Services CodeCommit or in any other Git repository. These repositories are cloned at the same level as the default repository of your notebook instance. For more information, see Associating Git Repositories with SageMaker AI Notebook Instances.
root_access: Whether root access is enabled or disabled for users of the notebook instance. The default value is Enabled. Lifecycle configurations need root access to be able to set up a notebook instance. Because of this, lifecycle configurations associated with a notebook instance always run with root access even if you disable root access for users.
- platform_identifier: The platform identifier of the notebook instance runtime environment.
+ platform_identifier: The platform identifier of the notebook instance runtime environment. The default value is notebook-al2-v2.
instance_metadata_service_configuration: Information on the IMDS configuration of the notebook instance
session: Boto3 session.
region: Region name.
diff --git a/src/sagemaker_core/main/shapes.py b/src/sagemaker_core/main/shapes.py
index 32f5e67..2e51e69 100644
--- a/src/sagemaker_core/main/shapes.py
+++ b/src/sagemaker_core/main/shapes.py
@@ -6377,6 +6377,19 @@ class InferenceComponentComputeResourceRequirements(Base):
max_memory_required_in_mb: Optional[int] = Unassigned()
+class InferenceComponentDataCacheConfig(Base):
+ """
+ InferenceComponentDataCacheConfig
+ Settings that affect how the inference component caches data.
+
+ Attributes
+ ----------------------
+ enable_caching: Sets whether the endpoint that hosts the inference component caches the model artifacts and container image. With caching enabled, the endpoint caches this data in each instance that it provisions for the inference component. That way, the inference component deploys faster during the auto scaling process. If caching isn't enabled, the inference component takes longer to deploy because of the time it spends downloading the data.
+ """
+
+ enable_caching: bool
+
+
class InferenceComponentSpecification(Base):
"""
InferenceComponentSpecification
@@ -6389,6 +6402,7 @@ class InferenceComponentSpecification(Base):
startup_parameters: Settings that take effect while the model container starts up.
compute_resource_requirements: The compute resources allocated to run the model, plus any adapter models, that you assign to the inference component. Omit this parameter if your request is meant to create an adapter inference component. An adapter inference component is loaded by a base inference component, and it uses the compute resources of the base inference component.
base_inference_component_name: The name of an existing inference component that is to contain the inference component that you're creating with your request. Specify this parameter only if your request is meant to create an adapter inference component. An adapter inference component contains the path to an adapter model. The purpose of the adapter model is to tailor the inference output of a base foundation model, which is hosted by the base inference component. The adapter inference component uses the compute resources that you assigned to the base inference component. When you create an adapter inference component, use the Container parameter to specify the location of the adapter artifacts. In the parameter value, use the ArtifactUrl parameter of the InferenceComponentContainerSpecification data type. Before you can create an adapter inference component, you must have an existing inference component that contains the foundation model that you want to adapt.
+ data_cache_config: Settings that affect how the inference component caches data.
"""
model_name: Optional[Union[str, object]] = Unassigned()
@@ -6398,6 +6412,7 @@ class InferenceComponentSpecification(Base):
Unassigned()
)
base_inference_component_name: Optional[str] = Unassigned()
+ data_cache_config: Optional[InferenceComponentDataCacheConfig] = Unassigned()
class InferenceComponentRuntimeConfig(Base):
@@ -7803,7 +7818,7 @@ class ProcessingS3Input(Base):
local_path: The local path in your container where you want Amazon SageMaker to write input data to. LocalPath is an absolute path to the input data and must begin with /opt/ml/processing/. LocalPath is a required parameter when AppManaged is False (default).
s3_data_type: Whether you use an S3Prefix or a ManifestFile for the data type. If you choose S3Prefix, S3Uri identifies a key name prefix. Amazon SageMaker uses all objects with the specified key name prefix for the processing job. If you choose ManifestFile, S3Uri identifies an object that is a manifest file containing a list of object keys that you want Amazon SageMaker to use for the processing job.
s3_input_mode: Whether to use File or Pipe input mode. In File mode, Amazon SageMaker copies the data from the input source onto the local ML storage volume before starting your processing container. This is the most commonly used input mode. In Pipe mode, Amazon SageMaker streams input data from the source directly to your processing container into named pipes without using the ML storage volume.
- s3_data_distribution_type: Whether to distribute the data from Amazon S3 to all processing instances with FullyReplicated, or whether the data from Amazon S3 is shared by Amazon S3 key, downloading one shard of data to each processing instance.
+ s3_data_distribution_type: Whether to distribute the data from Amazon S3 to all processing instances with FullyReplicated, or whether the data from Amazon S3 is sharded by Amazon S3 key, downloading one shard of data to each processing instance.
s3_compression_type: Whether to GZIP-decompress the data in Amazon S3 as it is streamed into the processing container. Gzip can only be used when Pipe mode is specified as the S3InputMode. In Pipe mode, Amazon SageMaker streams input data from the source directly to your container without using the EBS volume.
"""
@@ -9284,6 +9299,19 @@ class InferenceComponentContainerSpecificationSummary(Base):
environment: Optional[Dict[str, str]] = Unassigned()
+class InferenceComponentDataCacheConfigSummary(Base):
+ """
+ InferenceComponentDataCacheConfigSummary
+ Settings that affect how the inference component caches data.
+
+ Attributes
+ ----------------------
+ enable_caching: Indicates whether the inference component caches model artifacts as part of the auto scaling process.
+ """
+
+ enable_caching: bool
+
+
class InferenceComponentSpecificationSummary(Base):
"""
InferenceComponentSpecificationSummary
@@ -9296,6 +9324,7 @@ class InferenceComponentSpecificationSummary(Base):
startup_parameters: Settings that take effect while the model container starts up.
compute_resource_requirements: The compute resources allocated to run the model, plus any adapter models, that you assign to the inference component.
base_inference_component_name: The name of the base inference component that contains this inference component.
+ data_cache_config: Settings that affect how the inference component caches data.
"""
model_name: Optional[Union[str, object]] = Unassigned()
@@ -9305,6 +9334,7 @@ class InferenceComponentSpecificationSummary(Base):
Unassigned()
)
base_inference_component_name: Optional[str] = Unassigned()
+ data_cache_config: Optional[InferenceComponentDataCacheConfigSummary] = Unassigned()
class InferenceComponentRuntimeConfigSummary(Base):