diff --git a/src/discord-cluster-manager/cogs/submit_cog.py b/src/discord-cluster-manager/cogs/submit_cog.py index 7574739b..ec27e37f 100644 --- a/src/discord-cluster-manager/cogs/submit_cog.py +++ b/src/discord-cluster-manager/cogs/submit_cog.py @@ -1,4 +1,7 @@ +import subprocess +import tempfile from enum import Enum +from pathlib import Path from typing import TYPE_CHECKING, Optional, Tuple, Type if TYPE_CHECKING: @@ -6,8 +9,9 @@ import discord from better_profanity import profanity -from consts import SubmissionMode +from consts import CUDA_FLAGS, GPU_TO_SM, SubmissionMode from discord import app_commands +from discord.app_commands import Choice from discord.ext import commands from report import generate_report from run_eval import FullResult @@ -228,3 +232,179 @@ async def _run_submission( def _get_arch(self, gpu_type: app_commands.Choice[str]): raise NotImplementedError() + + async def generate_ptx_code(self, source_code: str, gpu_type: str, include_sass: bool = False) -> tuple[bool, str]: + """ + Generate PTX code for a CUDA submission. + + Args: + source_code (str): The CUDA source code + gpu_type (str): The GPU architecture to target + include_sass (bool): Whether to include SASS assembly code + + Returns: + tuple[bool, str]: Success status and the PTX output or error message + """ + # Get the SM architecture code for the specified GPU type + arch = GPU_TO_SM.get(gpu_type) + if not arch: + return False, f"Unknown GPU type: {gpu_type}. Available types: {', '.join(GPU_TO_SM.keys())}" + + try: + # Create a temporary directory for the compilation + with tempfile.TemporaryDirectory() as temp_dir: + temp_path = Path(temp_dir) + source_file = temp_path / "submission.cu" + + # Write the source code to a file + source_file.write_text(source_code) + + # Prepare the compilation command with PTX output flag + ptx_flags = CUDA_FLAGS.copy() + ["-ptx"] + + # Add sass generation flag if requested + if include_sass: + ptx_flags.append("-Xptxas=-v") # Verbose output with sass info + + arch_flag = f"-gencode=arch=compute_{arch},code=compute_{arch}" + + command = ["nvcc"] + ptx_flags + [str(source_file), arch_flag, "-o", str(temp_path / "output.ptx")] + + # Check if nvcc is available + nvcc_check = subprocess.run(["which", "nvcc"], capture_output=True, text=True) + if nvcc_check.returncode != 0: + return False, "NVCC (CUDA compiler) not found. Is CUDA installed?" + + # Run the compilation + process = subprocess.run(command, capture_output=True, text=True) + + # Prepare the output with both stderr (for SASS if requested) and the PTX file + result = "" + + # Include compilation output which contains SASS information + if include_sass and process.stderr: + result += "SASS Assembly Information:\n" + result += "-" * 40 + "\n" + result += process.stderr + "\n" + result += "-" * 40 + "\n\n" + + if process.returncode != 0: + # Compilation failed + return False, f"PTX generation failed:\n{process.stderr}" + + # Read the PTX file + ptx_file = temp_path / "output.ptx" + if ptx_file.exists(): + result += "PTX Code:\n" + result += "-" * 40 + "\n" + result += ptx_file.read_text() + return True, result + else: + return False, "PTX file was not generated" + except Exception as e: + return False, f"Error generating PTX: {str(e)}" + + @app_commands.command(name="ptx") + @app_commands.describe( + submission="The CUDA submission file (.cu extension)", + gpu_type="The GPU architecture to target", + include_sass="Whether to include SASS/assembly output", + as_file="Return the PTX code as a downloadable file instead of text messages" + ) + @app_commands.choices( + gpu_type=[ + Choice(name=gpu, value=gpu) for gpu in GPU_TO_SM.keys() + ] + ) + @with_error_handling + async def ptx_command(self, interaction: discord.Interaction, + submission: discord.Attachment, + gpu_type: Choice[str] = None, + include_sass: bool = False, + as_file: bool = False): + """ + Generate PTX code from a CUDA submission. + + Parameters + ------------ + submission: File + The CUDA submission file (.cu extension) + gpu_type: Choice[str] + The GPU architecture to target + include_sass: bool + Whether to include SASS assembly code in the output + as_file: bool + Return the PTX code as a downloadable file instead of text messages + """ + if not interaction.response.is_done(): + await interaction.response.defer() + + # Validate the file extension + if not submission.filename.endswith('.cu'): + await send_discord_message(interaction, "❌ Only .cu file extensions are supported for PTX generation") + return + + # Set default GPU type to T4 if not specified + target_gpu = gpu_type.value if gpu_type else "T4" + + try: + # Read the submission file + content = await submission.read() + source_code = content.decode('utf-8') + + # Create a thread for the PTX generation + thread_name = f"PTX Generation - {submission.filename} - {target_gpu}" + if include_sass: + thread_name += " with SASS" + + thread = await interaction.channel.create_thread( + name=thread_name, + type=discord.ChannelType.public_thread, + ) + + await thread.send(f"Generating PTX code for {submission.filename} targeting {target_gpu}..." + + (" (including SASS output)" if include_sass else "")) + + # Generate the PTX code + success, result = await self.generate_ptx_code(source_code, target_gpu, include_sass) + + if success: + if as_file: + # Create a temporary file containing the PTX output + with tempfile.NamedTemporaryFile('w', suffix='.ptx', delete=False) as temp_file: + temp_file.write(result) + temp_file_path = temp_file.name + + # Get the base filename without extension + base_filename = Path(submission.filename).stem + output_filename = f"{base_filename}_{target_gpu}.ptx" + + # Send the file + await thread.send( + f"PTX code for {submission.filename} targeting {target_gpu}:", + file=discord.File(temp_file_path, filename=output_filename) + ) + + # Remove the temporary file + Path(temp_file_path).unlink(missing_ok=True) + else: + # Split the PTX code into chunks if it's too long for Discord + max_msg_length = 1900 # Slightly less than 2000 to account for markdown + chunks = [result[i:i+max_msg_length] for i in range(0, len(result), max_msg_length)] + + for i, chunk in enumerate(chunks): + await thread.send(f"```{chunk}```") + + # Send a summary message + await thread.send(f"✅ PTX code generation complete for {target_gpu} GPU" + + (" with SASS assembly" if include_sass else "")) + else: + # Send the error message + await thread.send(f"❌ Failed to generate PTX code: {result}") + + # Notify user in the original channel + await send_discord_message(interaction, f"PTX generation for {submission.filename} is complete. Check the thread for results.") + + except Exception as e: + logger.error(f"Error generating PTX: {e}", exc_info=True) + await send_discord_message(interaction, f"❌ Error generating PTX: {str(e)}") diff --git a/src/discord-cluster-manager/cogs/verify_run_cog.py b/src/discord-cluster-manager/cogs/verify_run_cog.py index e81e2ae6..7e236477 100644 --- a/src/discord-cluster-manager/cogs/verify_run_cog.py +++ b/src/discord-cluster-manager/cogs/verify_run_cog.py @@ -261,6 +261,7 @@ async def verify_submission( # noqa: C901 if report_success: reports.append(f"✅ {run_id:20} {mode.name} behaved as expected") + @app_commands.command(name="verifyruns") async def verify_runs(self, interaction: discord.Interaction): """Verify runs on Modal, GitHub Nvidia, and GitHub AMD."""