gpu-mode · S1ro1 · Oct 13, 2025 · Oct 13, 2025 · Oct 13, 2025 · Nov 1, 2025
diff --git a/.github/workflows/nvidia-arc-health.yml b/.github/workflows/nvidia-arc-health.yml
@@ -6,27 +6,15 @@ on:
     - cron: '0 2 * * *'
   workflow_dispatch:
   push:
-    branches: [main]
 
 jobs:
   health-check:
-    runs-on: [gpumode-nvidia-arc]
+    runs-on: [nvidia-docker-b200-8-x86-64]
     timeout-minutes: 5
-    container:
-      image: nvidia/cuda:12.4.0-devel-ubuntu22.04
 
     steps:
-    - name: Setup Python
-      uses: actions/setup-python@v5
-      with:
-        python-version: '3.10'
-
-    - name: Install PyTorch
-      run: |
-        pip install torch
-
     - name: GPU Health Check
-      run: python -c "import torch; torch.randn(5, device='cuda')"
+      run: python3 -c "import torch; torch.randn(5, device='cuda')"
 
     env:
       CUDA_VISIBLE_DEVICES: 0
diff --git a/.github/workflows/nvidia_workflow.yml b/.github/workflows/nvidia_workflow.yml
@@ -19,22 +19,20 @@ run-name: 'NVIDIA Job - ${{ github.event.inputs.run_id }}'
 
 jobs:
   run:
-    runs-on: [gpumode-nvidia-arc]
+    runs-on: [nvidia-docker-b200-8-x86-64]
     timeout-minutes: 10
-    container:
-      image: nvidia/cuda:12.4.0-devel-ubuntu22.04
     steps:
     - uses: actions/checkout@v3
 
-    - name: Setup Python
-      uses: actions/setup-python@v5
-      with:
-        python-version: '3.10'
-
-    - name: Install uv
-      uses: astral-sh/setup-uv@v3
-      with:
-        version: "latest"
+    - name: nvidia-smi
+      shell: bash
+      run: |
+        nvidia-smi || echo "nvidia-smi failed"
+    
+    - name: ncu
+      shell: bash
+      run: |
+        ncu --version || echo "ncu failed"
 
     - name: Create input files
       shell: bash
@@ -49,30 +47,18 @@ jobs:
         # Now write to file (won't be logged since it's masked)
         echo "$PAYLOAD" > payload.json
 
-    - name: Install uv
-      uses: astral-sh/setup-uv@v3
-      with:
-        version: "latest"
-
-    - name: Setup Python environment
+    - name: Setup Virtual Environment and Install Dependencies
       shell: bash
       run: |
-        uv venv .venv
-        echo "VIRTUAL_ENV=$PWD/.venv" >> $GITHUB_ENV
-        echo "$PWD/.venv/bin" >> $GITHUB_PATH
+        pip install --upgrade pip
+        pip install -r "requirements.txt"
+        pip install -e .
 
-        if [[ -n "${{ github.event.inputs.requirements }}" ]]; then
-          cat > "requirements.txt" <<'EOL'
-          ${{ github.event.inputs.requirements }}
-        EOL
-        uv pip install -r "requirements.txt"
-        fi
-        uv pip install -e .
 
     - name: Run script
       shell: bash
       run: |
-        python src/runners/github-runner.py
+        python3 src/runners/github-runner.py
 
     - name: Upload training artifacts
       uses: actions/upload-artifact@v4
@@ -88,5 +74,3 @@ jobs:
         name: profile-data
         path: profile_data/*
         retention-days: 1
-    env:
-      CUDA_VISIBLE_DEVICES: 0
diff --git a/src/libkernelbot/run_eval.py b/src/libkernelbot/run_eval.py
@@ -22,7 +22,7 @@ class ProfileResult:
     # Public download URL of all files created by the profiler
     # This may also be configured later
     download_url: Optional[str]
-    #fmt: on
+    # fmt: on
 
 
 @dataclasses.dataclass
@@ -351,9 +351,15 @@ def profile_program(
             "--",
         ] + call
 
-        run_result = run_program(call, seed=seed, timeout=timeout, multi_gpu=multi_gpu, extra_env={
-            "GPU_DUMP_CODE_OBJECT": "1",
-        })
+        run_result = run_program(
+            call,
+            seed=seed,
+            timeout=timeout,
+            multi_gpu=multi_gpu,
+            extra_env={
+                "GPU_DUMP_CODE_OBJECT": "1",
+            },
+        )
 
         profile_result = None
 
@@ -377,7 +383,7 @@ def profile_program(
                 code_obj.rename(output_dir / code_obj.name)
 
             profile_result = ProfileResult(
-                profiler='rocPROF',
+                profiler="rocPROF",
                 download_url=None,
             )
 
@@ -386,6 +392,7 @@ def profile_program(
         # TODO: Implement profiling for other platforms
         return run_program(call, seed=seed, timeout=timeout, multi_gpu=multi_gpu), None
 
+
 def run_single_evaluation(
     system: SystemInfo,
     call: list[str],
@@ -427,7 +434,7 @@ def run_single_evaluation(
         return run_program(call, seed=seed, timeout=timeout, multi_gpu=multi_gpu), None
 
 
-def make_system_info() -> SystemInfo: # noqa: C901
+def make_system_info() -> SystemInfo:  # noqa: C901
     info = SystemInfo()
     try:
         import torch
@@ -448,14 +455,16 @@ def make_system_info() -> SystemInfo: # noqa: C901
             info.gpu = subprocess.check_output(
                 ["nvidia-smi", "--query-gpu=name", "--format=csv,noheader"], encoding="utf-8"
             )
-            info.device_count = info.gpu.count('\n')
+            info.device_count = info.gpu.count("\n")
             info.runtime = "CUDA"
         except subprocess.CalledProcessError:
             # try again for HIP
             try:
-                rocm_info = json.loads(subprocess.check_output(
-                    ["rocm-smi", "--showproductname", "--json"], encoding="utf-8"
-                ))
+                rocm_info = json.loads(
+                    subprocess.check_output(
+                        ["rocm-smi", "--showproductname", "--json"], encoding="utf-8"
+                    )
+                )
                 if len(rocm_info) > 0:
                     info.gpu = next(rocm_info.__iter__())["Card Series"]
 
@@ -587,7 +596,7 @@ def run_pytorch_script(  # noqa: C901
         # "compile" step: execute the script once. Will populate
         # `load_inline`'s compile cache, so the actual runs will be faster.
         try:
-            compile_run = run_program(["python", "submission.py"], seed=1, timeout=Timeout.COMPILE)
+            compile_run = run_program(["python3", "submission.py"], seed=1, timeout=Timeout.COMPILE)
             if "-DTORCH_EXTENSION_NAME" in compile_run.stdout:
                 comp = CompileResult(
                     nvcc_found=True,
@@ -613,7 +622,7 @@ def run_pytorch_script(  # noqa: C901
                 exit_code=e.returncode,
             )
 
-        run, profile = run_single_evaluation(system, ["python", main], **kwargs)
+        run, profile = run_single_evaluation(system, ["python3", main], **kwargs)
 
         return EvalResult(
             start=start,