Skip to content

Commit 38da6bc

Browse files
committed
Add support for AMD GPU via --gpu=amd for docker linux amd64.
1 parent a46a49b commit 38da6bc

File tree

34 files changed

+517
-31
lines changed

34 files changed

+517
-31
lines changed
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
name: "update-amd-gpu-device-plugin-version"
2+
on:
3+
workflow_dispatch:
4+
schedule:
5+
# every Monday at around 3 am pacific/10 am UTC
6+
- cron: "0 10 * * 1"
7+
env:
8+
GOPROXY: https://proxy.golang.org
9+
GO_VERSION: '1.23.0'
10+
permissions:
11+
contents: read
12+
13+
jobs:
14+
bump-amd-gpu-device-plugin-version:
15+
runs-on: ubuntu-22.04
16+
steps:
17+
- uses: actions/checkout@d632683dd7b4114ad314bca15554477dd762a938
18+
- uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32
19+
with:
20+
go-version: ${{env.GO_VERSION}}
21+
- name: Bump amd-gpu-device-plugin version
22+
id: bumpAmdDevicePlugin
23+
run: |
24+
echo "OLD_VERSION=$(DEP=amd-gpu-device-plugin make get-dependency-version)" >> "$GITHUB_OUTPUT"
25+
make update-amd-gpu-device-plugin-version
26+
echo "NEW_VERSION=$(DEP=amd-gpu-device-plugin make get-dependency-version)" >> "$GITHUB_OUTPUT"
27+
# The following is to support multiline with GITHUB_OUTPUT, see https://docs.github.com/en/actions/using-workflows/workflow-commands-for-github-actions#multiline-strings
28+
echo "changes<<EOF" >> "$GITHUB_OUTPUT"
29+
echo "$(git status --porcelain)" >> "$GITHUB_OUTPUT"
30+
echo "EOF" >> "$GITHUB_OUTPUT"
31+
- name: Create PR
32+
if: ${{ steps.bumpAmdDevicePlugin.outputs.changes != '' }}
33+
uses: peter-evans/create-pull-request@5e914681df9dc83aa4e4905692ca88beb2f9e91f
34+
with:
35+
token: ${{ secrets.MINIKUBE_BOT_PAT }}
36+
commit-message: 'Addon amd-gpu-device-plugin: Update amd/k8s-device-plugin image from ${{ steps.bumpAmdDevicePlugin.outputs.OLD_VERSION }} to ${{ steps.bumpAmdDevicePlugin.outputs.NEW_VERSION }}'
37+
committer: minikube-bot <[email protected]>
38+
author: minikube-bot <[email protected]>
39+
branch: auto_bump_amd_device_plugin_version
40+
push-to-fork: minikube-bot/minikube
41+
base: master
42+
delete-branch: true
43+
title: 'Addon amd-gpu-device-plugin: Update amd/k8s-device-plugin image from ${{ steps.bumpAmdDevicePlugin.outputs.OLD_VERSION }} to ${{ steps.bumpAmdDevicePlugin.outputs.NEW_VERSION }}'
44+
labels: ok-to-test
45+
body: |
46+
The [k8s-device-plugin](https://github.com/ROCm/k8s-device-plugin) project released a new k8s-device-plugin image
47+
48+
This PR was auto-generated by `make update-amd-gpu-device-plugin-version` using [update-amd-gpu-device-plugin-version.yml](https://github.com/kubernetes/minikube/tree/master/.github/workflows/update-amd-gpu-device-plugin-version.yml) CI Workflow.

Makefile

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1222,6 +1222,11 @@ update-nvidia-device-plugin-version:
12221222
(cd hack/update/nvidia_device_plugin_version && \
12231223
go run update_nvidia_device_plugin_version.go)
12241224

1225+
.PHONY: update-amd-gpu-device-plugin-version
1226+
update-amd-gpu-device-plugin-version:
1227+
(cd hack/update/amd_device_plugin_version && \
1228+
go run update_amd_device_plugin_version.go)
1229+
12251230
.PHONY: update-nerctld-version
12261231
update-nerdctld-version:
12271232
(cd hack/update/nerdctld_version && \

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ As well as developer-friendly features:
3535

3636
* [Addons](https://minikube.sigs.k8s.io/docs/handbook/deploying/#addons) - a marketplace for developers to share configurations for running services on minikube
3737
* [NVIDIA GPU support](https://minikube.sigs.k8s.io/docs/tutorials/nvidia/) - for machine learning
38+
* [AMD GPU support](https://minikube.sigs.k8s.io/docs/tutorials/amd/) - for machine learning
3839
* [Filesystem mounts](https://minikube.sigs.k8s.io/docs/handbook/mount/)
3940

4041
**For more information, see the official [minikube website](https://minikube.sigs.k8s.io)**

cmd/minikube/cmd/start.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1462,8 +1462,8 @@ func validateGPUs(value, drvName, rtime string) error {
14621462
if err := validateGPUsArch(); err != nil {
14631463
return err
14641464
}
1465-
if value != "nvidia" && value != "all" {
1466-
return errors.Errorf(`The gpus flag must be passed a value of "nvidia" or "all"`)
1465+
if value != "nvidia" && value != "all" && value != "amd" {
1466+
return errors.Errorf(`The gpus flag must be passed a value of "nvidia", "amd" or "all"`)
14671467
}
14681468
if drvName == constants.Docker && (rtime == constants.Docker || rtime == constants.DefaultContainerRuntime) {
14691469
return nil

cmd/minikube/cmd/start_flags.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ func initMinikubeFlags() {
206206
startCmd.Flags().Bool(disableOptimizations, false, "If set, disables optimizations that are set for local Kubernetes. Including decreasing CoreDNS replicas from 2 to 1. Defaults to false.")
207207
startCmd.Flags().Bool(disableMetrics, false, "If set, disables metrics reporting (CPU and memory usage), this can improve CPU usage. Defaults to false.")
208208
startCmd.Flags().String(staticIP, "", "Set a static IP for the minikube cluster, the IP must be: private, IPv4, and the last octet must be between 2 and 254, for example 192.168.200.200 (Docker and Podman drivers only)")
209-
startCmd.Flags().StringP(gpus, "g", "", "Allow pods to use your NVIDIA GPUs. Options include: [all,nvidia] (Docker driver with Docker container-runtime only)")
209+
startCmd.Flags().StringP(gpus, "g", "", "Allow pods to use your GPUs. Options include: [all,nvidia,amd] (Docker driver with Docker container-runtime only)")
210210
startCmd.Flags().Duration(autoPauseInterval, time.Minute*1, "Duration of inactivity before the minikube VM is paused (default 1m0s)")
211211
}
212212

cmd/minikube/cmd/start_test.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -814,7 +814,10 @@ func TestValidateGPUs(t *testing.T) {
814814
{"nvidia", "docker", "", ""},
815815
{"all", "kvm", "docker", "The gpus flag can only be used with the docker driver and docker container-runtime"},
816816
{"nvidia", "docker", "containerd", "The gpus flag can only be used with the docker driver and docker container-runtime"},
817-
{"cat", "docker", "docker", `The gpus flag must be passed a value of "nvidia" or "all"`},
817+
{"cat", "docker", "docker", `The gpus flag must be passed a value of "nvidia", "amd" or "all"`},
818+
{"amd", "docker", "docker", ""},
819+
{"amd", "docker", "", ""},
820+
{"amd", "docker", "containerd", "The gpus flag can only be used with the docker driver and docker container-runtime"},
818821
}
819822

820823
for _, tc := range tests {

deploy/addons/assets.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,10 @@ var (
107107
//go:embed gpu/nvidia-gpu-device-plugin.yaml.tmpl
108108
NvidiaGpuDevicePluginAssets embed.FS
109109

110+
// AmdGpuDevicePluginAssets assets for amd-gpu-device-plugin addon
111+
//go:embed gpu/amd-gpu-device-plugin.yaml.tmpl
112+
AmdGpuDevicePluginAssets embed.FS
113+
110114
// LogviewerAssets assets for logviewer addon
111115
//go:embed logviewer/*.tmpl logviewer/*.yaml
112116
LogviewerAssets embed.FS
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
# Copyright 2024 The Kubernetes Authors All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
apiVersion: apps/v1
16+
kind: DaemonSet
17+
metadata:
18+
name: amd-gpu-device-plugin
19+
namespace: kube-system
20+
labels:
21+
k8s-app: amd-gpu-device-plugin
22+
kubernetes.io/minikube-addons: amd-gpu-device-plugin
23+
addonmanager.kubernetes.io/mode: Reconcile
24+
spec:
25+
selector:
26+
matchLabels:
27+
k8s-app: amd-gpu-device-plugin
28+
template:
29+
metadata:
30+
labels:
31+
name: amd-gpu-device-plugin
32+
k8s-app: amd-gpu-device-plugin
33+
spec:
34+
nodeSelector:
35+
kubernetes.io/arch: amd64
36+
priorityClassName: system-node-critical
37+
tolerations:
38+
- key: CriticalAddonsOnly
39+
operator: Exists
40+
volumes:
41+
- name: dp
42+
hostPath:
43+
path: /var/lib/kubelet/device-plugins
44+
- name: sys
45+
hostPath:
46+
path: /sys
47+
containers:
48+
- image: {{.CustomRegistries.AmdDevicePlugin | default .ImageRepository | default .Registries.AmdDevicePlugin }}{{.Images.AmdDevicePlugin}}
49+
name: amd-gpu-device-plugin
50+
securityContext:
51+
allowPrivilegeEscalation: false
52+
capabilities:
53+
drop: ["ALL"]
54+
volumeMounts:
55+
- name: dp
56+
mountPath: /var/lib/kubelet/device-plugins
57+
- name: sys
58+
mountPath: /sys
59+
updateStrategy:
60+
type: RollingUpdate
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
/*
2+
Copyright 2024 The Kubernetes Authors All rights reserved.
3+
4+
Licensed under the Apache License, Version 2.0 (the "License");
5+
you may not use this file except in compliance with the License.
6+
You may obtain a copy of the License at
7+
8+
http://www.apache.org/licenses/LICENSE-2.0
9+
10+
Unless required by applicable law or agreed to in writing, software
11+
distributed under the License is distributed on an "AS IS" BASIS,
12+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
See the License for the specific language governing permissions and
14+
limitations under the License.
15+
*/
16+
17+
package main
18+
19+
import (
20+
"context"
21+
"fmt"
22+
"time"
23+
24+
"k8s.io/klog/v2"
25+
"k8s.io/minikube/hack/update"
26+
)
27+
28+
var schema = map[string]update.Item{
29+
"pkg/minikube/assets/addons.go": {
30+
Replace: map[string]string{
31+
`rocm/k8s-device-plugin:.*`: `rocm/k8s-device-plugin:{{.Version}}@{{.SHA}}",`,
32+
},
33+
},
34+
}
35+
36+
type Data struct {
37+
Version string
38+
SHA string
39+
}
40+
41+
func main() {
42+
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Minute)
43+
defer cancel()
44+
45+
stable, _, _, err := update.GHReleases(ctx, "ROCm", "k8s-device-plugin")
46+
if err != nil {
47+
klog.Fatalf("Unable to get stable version: %v", err)
48+
}
49+
sha, err := update.GetImageSHA(fmt.Sprintf("rocm/k8s-device-plugin:%s", stable.Tag))
50+
if err != nil {
51+
klog.Fatalf("failed to get image SHA: %v", err)
52+
}
53+
54+
data := Data{Version: stable.Tag, SHA: sha}
55+
56+
update.Apply(schema, data)
57+
}

hack/update/get_version/get_version.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ type dependency struct {
3333
}
3434

3535
var dependencies = map[string]dependency{
36+
"amd-gpu-device-plugin": {addonsFile, `rocm/k8s-device-plugin:(.*)@`},
3637
"buildkit": {"deploy/iso/minikube-iso/arch/x86_64/package/buildkit-bin/buildkit-bin.mk", `BUILDKIT_BIN_VERSION = (.*)`},
3738
"calico": {"pkg/minikube/bootstrapper/images/images.go", `calicoVersion = "(.*)"`},
3839
"cilium": {"pkg/minikube/cni/cilium.yaml", `quay.io/cilium/cilium:(.*)@`},

0 commit comments

Comments
 (0)