diff --git a/.github/workflows/nvidia_workflow.yml b/.github/workflows/nvidia_workflow.yml index 7852377d..52eb4a81 100644 --- a/.github/workflows/nvidia_workflow.yml +++ b/.github/workflows/nvidia_workflow.yml @@ -18,24 +18,31 @@ jobs: container: image: nvidia/cuda:12.4.0-devel-ubuntu22.04 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Setup Python uses: actions/setup-python@v5 with: python-version: '3.10' - + - name: Create input files - shell: bash - run: | - # Extract the payload content without printing it - PAYLOAD=$(jq -r '.inputs.payload' $GITHUB_EVENT_PATH) - - # Apply mask to the extracted content - echo "::add-mask::$PAYLOAD" - - # Now write to file (won't be logged since it's masked) - echo "$PAYLOAD" > payload.json + uses: nick-fields/retry@v3 + with: + timeout_minutes: 2 + max_attempts: 5 + shell: bash + command: | + # install jq + apt update + apt install -y jq + # Extract the payload content without printing it + PAYLOAD=$(jq -r '.inputs.payload' $GITHUB_EVENT_PATH) + + # Apply mask to the extracted content + echo "::add-mask::$PAYLOAD" + + # Now write to file (won't be logged since it's masked) + echo "$PAYLOAD" > payload.json - name: Install uv uses: astral-sh/setup-uv@v3 diff --git a/examples/eval.py b/examples/eval.py index e414a580..37d3eef9 100644 --- a/examples/eval.py +++ b/examples/eval.py @@ -17,7 +17,7 @@ except ImportError: TestSpec = dict -from reference import check_implementation, generate_input +from reference import check_implementation, generate_input, ref_kernel class PopcornOutput: @@ -198,18 +198,21 @@ def run_testing(logger: PopcornOutput, pool: multiprocessing.Pool, tests: list[T return 112 -def _run_single_benchmark(test: TestCase, recheck: bool, max_repeats: int, max_time_ns: float) -> Stats | Any: +def _run_single_benchmark(test: TestCase, recheck: bool, max_repeats: int, max_time_ns: float, is_baseline_run: bool) -> Stats | Any: """ Runs one benchmark. Do not call directly. """ - from submission import custom_kernel + if not is_baseline_run: + # submission does not exist for a baseline run + from submission import custom_kernel durations = [] # generate input data once data = generate_input(**test.args) check_copy = _clone_data(data) + active_kernel = ref_kernel if is_baseline_run else custom_kernel # first, one obligatory correctness check - output = custom_kernel(data) + output = active_kernel(data) good, message = wrap_check_implementation(check_copy, output) if not good: return message @@ -229,12 +232,12 @@ def _run_single_benchmark(test: TestCase, recheck: bool, max_repeats: int, max_t check_copy = _clone_data(data) torch.cuda.synchronize() start = time.perf_counter_ns() - output = custom_kernel(data) + output = active_kernel(data) torch.cuda.synchronize() end = time.perf_counter_ns() if recheck: - good, message = check_implementation(check_copy, output) + good, message = wrap_check_implementation(check_copy, output) if not good: return message @@ -249,7 +252,7 @@ def _run_single_benchmark(test: TestCase, recheck: bool, max_repeats: int, max_t return calculate_stats(durations) -def run_single_benchmark(pool: multiprocessing.Pool, test: TestCase, recheck: bool, max_repeats: int, max_time_ns: float): +def run_single_benchmark(pool: multiprocessing.Pool, test: TestCase, recheck: bool, max_repeats: int, max_time_ns: float, is_baseline_run: bool = False): """ For a particular test case, check correctness (if applicable) and grab runtime results. @@ -260,7 +263,7 @@ def run_single_benchmark(pool: multiprocessing.Pool, test: TestCase, recheck: bo @param max_time_ns: Timeout time in nanoseconds. @return: A Stats object for this particular benchmark case or an error if the test fails. """ - return pool.apply(_run_single_benchmark, (test, recheck, max_repeats, max_time_ns)) + return pool.apply(_run_single_benchmark, (test, recheck, max_repeats, max_time_ns, is_baseline_run)) def run_benchmarking(logger: PopcornOutput, pool: multiprocessing.Pool, tests: list[TestCase]): @@ -300,13 +303,13 @@ def run_single_profile(test: TestCase) -> str: """ Runs a single test case. Do not call directly """ - from submission import custom_kernel from torch.profiler import profile, record_function, ProfilerActivity + from submission import custom_kernel data = generate_input(**test.args) torch.cuda.synchronize() with profile(activities=[ProfilerActivity.CPU, ProfilerActivity.CUDA]) as prof: - submission_output = custom_kernel(_clone_data(data)) + submission_output = active_kernel(_clone_data(data)) torch.cuda.synchronize() return prof.key_averages().table(sort_by="self_cuda_time_total", row_limit=20) @@ -327,9 +330,9 @@ def main(): return 111 if len(sys.argv) < 3: - return 2 + return 222 - mode = sys.argv[1] + mode = sys.argv[1].strip() seed = os.getenv("POPCORN_SEED") os.unsetenv("POPCORN_SEED") seed = int(seed) if seed else None @@ -345,13 +348,14 @@ def main(): if mode == "benchmark": return run_benchmarking(logger, pool, tests) - if mode == "leaderboard": + if (mode == "leaderboard") or (mode == "baseline"): + is_baseline_run = mode == "baseline" # warmup - run_single_benchmark(pool, tests[0], False, 100, 1e7) + run_single_benchmark(pool, tests[0], False, 100, 1e7, is_baseline_run) logger.log("benchmark-count", len(tests)) passed = True for i in range(len(tests)): - result = run_single_benchmark(pool, tests[i], True, 100, 30e9) + result = run_single_benchmark(pool, tests[i], True, 100, 30e9, is_baseline_run) logger.log(f"benchmark.{i}.spec", tests[i].spec) if isinstance(result, Stats): for field in dataclasses.fields(Stats): @@ -367,7 +371,9 @@ def main(): run_profiling(logger, tests) else: # TODO: Implement script mode - return 2 + logger.log(mode, "not implemented") + print(f"mode {mode} not implemented") + return 333 if __name__ == "__main__": diff --git a/src/discord-cluster-manager/cogs/admin_cog.py b/src/discord-cluster-manager/cogs/admin_cog.py index 8e39ee2f..a20c9bb3 100644 --- a/src/discord-cluster-manager/cogs/admin_cog.py +++ b/src/discord-cluster-manager/cogs/admin_cog.py @@ -10,7 +10,8 @@ import discord import env import yaml -from consts import GitHubGPU, ModalGPU +from cogs.leaderboard_cog import LeaderboardSubmitCog +from consts import GitHubGPU, ModalGPU, SubmissionMode from discord import app_commands from discord.ext import commands, tasks from leaderboard_db import leaderboard_name_autocomplete @@ -120,6 +121,10 @@ def __init__(self, bot: "ClusterBot"): name="set-forum-ids", description="Sets forum IDs" )(self.set_forum_ids) + self.baseline_run = bot.admin_group.command( + name="baseline-run", description="Create a baseline run for a leaderboard" + )(self.baseline_run) + self._scheduled_cleanup_temp_users.start() # -------------------------------------------------------------------------- @@ -1025,3 +1030,58 @@ async def set_forum_ids(self, interaction: discord.Interaction): error_message = f"Error updating forum ids: {str(e)}" logger.error(error_message, exc_info=True) await send_discord_message(interaction, error_message, ephemeral=True) + + # ---------------------------------------------------------------------- + # Baseline run submission (admin only) + # ---------------------------------------------------------------------- + @discord.app_commands.describe( + leaderboard_name="Name of the leaderboard to create a baseline run for", + gpu="GPU(s) to use; leave empty for interactive selection", + force="Create another baseline run even if one already exists.", + ) + @discord.app_commands.autocomplete( + leaderboard_name=leaderboard_name_autocomplete, + ) + @with_error_handling + async def baseline_run( + self, + interaction: discord.Interaction, + leaderboard_name: str, + gpu: Optional[str] = None, + force: bool = False, + ): + """Admin command to create (or force-create) a baseline run.""" + + # Ensure caller is admin + is_admin = await self.admin_check(interaction) + if not is_admin: + await send_discord_message( + interaction, + "You need Admin permissions to run this command.", + ephemeral=True, + ) + return + + # Check for existing baseline run unless forcing + if not force: + with self.bot.leaderboard_db as db: + if db.has_baseline_run(leaderboard_name): + await send_discord_message( + interaction, + ( + "A baseline run already exists for this leaderboard. " + "Use the 'force' flag to create another." + ), + ephemeral=True, + ) + return + + lb_cog = LeaderboardSubmitCog(self.bot) + + await lb_cog.submit( + interaction=interaction, + leaderboard_name=leaderboard_name, + script=None, + mode=SubmissionMode.BASELINE, + gpu=gpu, + ) diff --git a/src/discord-cluster-manager/cogs/leaderboard_cog.py b/src/discord-cluster-manager/cogs/leaderboard_cog.py index fadc9c41..354752f7 100644 --- a/src/discord-cluster-manager/cogs/leaderboard_cog.py +++ b/src/discord-cluster-manager/cogs/leaderboard_cog.py @@ -5,6 +5,8 @@ import discord from consts import ( + BASELINE_USER, + BASELINE_USER_ID, SubmissionMode, get_gpu_by_name, ) @@ -63,28 +65,51 @@ async def on_submit_hook( # noqa: C901 self, interaction: discord.Interaction, leaderboard_name: Optional[str], - script: discord.Attachment, + script: Optional[discord.Attachment], mode: SubmissionMode, cmd_gpus: Optional[List[str]], ) -> int: """ Called as the main body of a submission to route to the correct runner. """ - # Read the template file - submission_content = await script.read() - try: - submission_content = submission_content.decode() - except UnicodeError: - await send_discord_message( - interaction, "Could not decode your file. Is it UTF-8?", ephemeral=True - ) - return -1 + if script is None: + if mode != SubmissionMode.BASELINE and not script: + await send_discord_message( + interaction, + "Script attachment is required for this unless submission mode is baseline", + ephemeral=True, + ) + return -1 + else: + submission_content = "" + else: + # Read the template file + submission_content = await script.read() + + try: + submission_content = submission_content.decode() + except UnicodeError: + await send_discord_message( + interaction, "Could not decode your file. Is it UTF-8?", ephemeral=True + ) + return -1 + if mode == SubmissionMode.BASELINE: + # create fake baseline submission + file_name = None + submission_content = None + user_id = BASELINE_USER_ID + user_name = BASELINE_USER + else: + file_name = script.filename + submission_content = submission_content + user_id = interaction.user.id + user_name = interaction.user.global_name or interaction.user.name req = SubmissionRequest( code=submission_content, - file_name=script.filename, - user_id=interaction.user.id, + file_name=file_name, + user_id=user_id, gpus=cmd_gpus, leaderboard=leaderboard_name, ) @@ -105,26 +130,28 @@ async def on_submit_hook( # noqa: C901 command = self.bot.get_cog("SubmitCog").submit_leaderboard - user_name = interaction.user.global_name or interaction.user.name # Create a submission entry in the database with self.bot.leaderboard_db as db: sub_id = db.create_submission( leaderboard=req.leaderboard, - file_name=script.filename, + file_name=file_name, code=submission_content, - user_id=interaction.user.id, + user_id=user_id, time=datetime.now(), user_name=user_name, ) + if mode == SubmissionMode.BASELINE: + run_msg = f"Submission **{sub_id}**: is a baseline submission for `{req.leaderboard}`" + else: + run_msg = f"Submission **{sub_id}**: `{file_name}` for `{req.leaderboard}`" - run_msg = f"Submission **{sub_id}**: `{script.filename}` for `{req.leaderboard}`" reporter = MultiProgressReporter(interaction, run_msg) try: tasks = [ command( sub_id, submission_content, - script.filename, + file_name, gpu, reporter.add_run(f"{gpu.name} on {gpu.runner}"), req.task, @@ -140,7 +167,7 @@ async def on_submit_hook( # noqa: C901 command( sub_id, submission_content, - script.filename, + file_name, gpu, reporter.add_run(f"{gpu.name} on {gpu.runner} (secret)"), req.task, @@ -155,7 +182,7 @@ async def on_submit_hook( # noqa: C901 with self.bot.leaderboard_db as db: db.mark_submission_done(sub_id) - if mode == SubmissionMode.LEADERBOARD: + if mode == SubmissionMode.LEADERBOARD or mode == SubmissionMode.BASELINE: await self.post_submit_hook(interaction, sub_id) return sub_id @@ -194,16 +221,15 @@ def generate_run_verdict(self, run: RunItem, sub_data: SubmissionItem): async def post_submit_hook(self, interaction: discord.Interaction, sub_id: int): with self.bot.leaderboard_db as db: sub_data: SubmissionItem = db.get_submission_by_id(sub_id) - result_lines = [] for run in sub_data["runs"]: if ( not run["secret"] - and run["mode"] == SubmissionMode.LEADERBOARD.value + and (run["mode"] == SubmissionMode.LEADERBOARD.value + or run["mode"] == SubmissionMode.BASELINE.value) and run["passed"] ): result_lines.append(self.generate_run_verdict(run, sub_data)) - if len(result_lines) > 0: await send_discord_message( interaction, @@ -224,10 +250,19 @@ async def submit( self, interaction: discord.Interaction, leaderboard_name: Optional[str], - script: discord.Attachment, + script: Optional[discord.Attachment], mode: SubmissionMode, gpu: Optional[str], ): + + if not mode == SubmissionMode.BASELINE and not script: + await send_discord_message( + interaction, + "Script attachment is required for this unless submission mode is baseline", + ephemeral=True, + ) + return + if not self.bot.accepts_jobs: await send_discord_message( interaction, @@ -319,7 +354,6 @@ async def submit_ranked( interaction, leaderboard_name, script, mode=SubmissionMode.LEADERBOARD, gpu=gpu ) - async def lang_autocomplete( interaction: discord.Interaction, current: str, diff --git a/src/discord-cluster-manager/cogs/submit_cog.py b/src/discord-cluster-manager/cogs/submit_cog.py index 0657641f..0ac10e2a 100644 --- a/src/discord-cluster-manager/cogs/submit_cog.py +++ b/src/discord-cluster-manager/cogs/submit_cog.py @@ -104,12 +104,21 @@ async def submit_leaderboard( # noqa: C901 if result.success: score = None if ( - "leaderboard" in result.runs + ("leaderboard" in result.runs and result.runs["leaderboard"].run.success - and result.runs["leaderboard"].run.passed + and result.runs["leaderboard"].run.passed) + or ("baseline" in result.runs + and result.runs["baseline"].run.success + and result.runs["baseline"].run.passed) ): + if "leaderboard" in result.runs: + key = "leaderboard" + elif "baseline" in result.runs: + key = "baseline" + else: + raise KernelBotError("Leaderboard or baseline run failed") score = 0.0 - num_benchmarks = int(result.runs["leaderboard"].run.result["benchmark-count"]) + num_benchmarks = int(result.runs[key].run.result["benchmark-count"]) if task.ranking_by == RankCriterion.LAST: if num_benchmarks != 1: logger.error( @@ -122,19 +131,17 @@ async def submit_leaderboard( # noqa: C901 f"Expected submission to have exactly one benchmark," f"got {num_benchmarks}." ) - score = float(result.runs["leaderboard"].run.result["benchmark.0.mean"]) / 1e9 + score = float(result.runs[key].run.result["benchmark.0.mean"]) / 1e9 else: scores = [] for i in range(num_benchmarks): scores.append( - float(result.runs["leaderboard"].run.result[f"benchmark.{i}.mean"]) - / 1e9 + float(result.runs[key].run.result[f"benchmark.{i}.mean"]) / 1e9 ) if task.ranking_by == RankCriterion.MEAN: score = sum(scores) / len(scores) elif task.ranking_by == RankCriterion.GEOM: score = math.pow(math.prod(scores), 1.0 / num_benchmarks) - # verifyruns uses a fake submission id of -1 if submission_id != -1: with self.bot.leaderboard_db as db: @@ -145,7 +152,7 @@ async def submit_leaderboard( # noqa: C901 value.end, mode=key, runner=gpu_type.name, - score=None if key != "leaderboard" else score, + score=None if (key != "leaderboard" and key != "baseline") else score, secret=mode == SubmissionMode.PRIVATE, compilation=value.compilation, result=value.run, diff --git a/src/discord-cluster-manager/consts.py b/src/discord-cluster-manager/consts.py index 928f59d4..03ada325 100644 --- a/src/discord-cluster-manager/consts.py +++ b/src/discord-cluster-manager/consts.py @@ -80,7 +80,7 @@ class SubmissionMode(Enum): """ Different types of submission that can be made: Test: Run tests and give detailed results about passed/failed tests. These have short timeouts. - Benchmark: Run larger benchmarks. Each benchmark is tested once, and then run multiple times. + Benchmark: Run larger benchmarks. Each benchmark is tested once, then run multiple times. Profile: Gather profiling information. One selected benchmark is run under the profiler. No testing is performed in this mode (sometimes, you need to profile deliberately broken code) Leaderboard: Official submission to the leaderboard. This first runs public tests, then a @@ -97,7 +97,7 @@ class SubmissionMode(Enum): LEADERBOARD = "leaderboard" PRIVATE = "private" SCRIPT = "script" - + BASELINE = "baseline" class Language(Enum): Python = "py" @@ -157,3 +157,7 @@ class RankCriterion(Enum): --index-url https://download.pytorch.org/whl/rocm6.2.4 torch """ + +# Constants used for baseline runs +BASELINE_USER = "BASELINE_USER" +BASELINE_USER_ID = -123 diff --git a/src/discord-cluster-manager/leaderboard_db.py b/src/discord-cluster-manager/leaderboard_db.py index d48e8404..69a08d7a 100644 --- a/src/discord-cluster-manager/leaderboard_db.py +++ b/src/discord-cluster-manager/leaderboard_db.py @@ -6,6 +6,7 @@ import discord import psycopg2 +from consts import BASELINE_USER, BASELINE_USER_ID from env import ( DATABASE_URL, DISABLE_SSL, @@ -213,6 +214,11 @@ def create_submission( time: datetime.datetime, user_name: str = None, ) -> Optional[int]: + if user_id == BASELINE_USER_ID and user_name == BASELINE_USER: + # todo: add reference code to the database + code = "" + file_name = "reference.py" + try: # check if we already have the code self.cursor.execute( @@ -287,6 +293,22 @@ def create_submission( self.connection.rollback() # Ensure rollback if error occurs raise KernelBotError("Error during creation of submission") from e + def has_baseline_run(self, leaderboard_name: str) -> bool: + try: + self.cursor.execute( + """ + SELECT COUNT(*) FROM leaderboard.runs r + JOIN leaderboard.submission s ON r.submission_id = s.id + JOIN leaderboard.leaderboard l ON s.leaderboard_id = l.id + WHERE l.name = %s AND s.user_id = %s + """, + (leaderboard_name, str(BASELINE_USER_ID)), + ) + return self.cursor.fetchone()[0] > 0 + except psycopg2.Error as e: + logger.error("Error checking for reference run", exc_info=e) + return False + def mark_submission_done( self, submission: int, diff --git a/src/discord-cluster-manager/report.py b/src/discord-cluster-manager/report.py index 4e09c2c5..07e3b517 100644 --- a/src/discord-cluster-manager/report.py +++ b/src/discord-cluster-manager/report.py @@ -160,6 +160,7 @@ def make_short_report(runs: dict[str, EvalResult], full=True) -> list[str]: # n Creates a minimalistic report for `runs`, returned as a list of status strings """ + any_compile = False result = [] for r in runs.values(): @@ -218,6 +219,16 @@ def make_short_report(runs: dict[str, EvalResult], full=True) -> list[str]: # n result.append("✅ Leaderboard run successful") elif full: result.append("❌ Leaderboard missing") + + if "baseline" in runs: + ref_run = runs["baseline"].run + if not ref_run.success: + result.append("❌ Running baseline failed" + _short_fail_reason(ref_run)) + elif not ref_run.passed: + result.append("❌ Baseline run failed") + else: + result.append("✅ Baseline run successful") + return result diff --git a/src/discord-cluster-manager/run_eval.py b/src/discord-cluster-manager/run_eval.py index 5e7ab046..9cc23bea 100644 --- a/src/discord-cluster-manager/run_eval.py +++ b/src/discord-cluster-manager/run_eval.py @@ -218,7 +218,6 @@ def compile_cuda_script( # # noqa: C901 def run_program(args: list[str], seed: Optional[int], timeout: int) -> RunResult: - print("[Running]") # set up a pipe so the tester can communicate its verdict with us env = os.environ.copy() pipe_read, pipe_write = os.pipe() @@ -296,7 +295,7 @@ def run_single_evaluation( tests_file.write(tests) tests_file.flush() return run_program(call + [mode, tests_file.name], seed=seed, timeout=test_timeout) - elif mode in ["benchmark", "profile", "leaderboard"]: + elif mode in ["benchmark", "profile", "leaderboard", "baseline"]: timeout = ranked_timeout if mode == "leaderboard" else benchmark_timeout with tempfile.NamedTemporaryFile("w") as bench_file: if ranking_by == "last": @@ -432,6 +431,7 @@ def run_cuda_script( # # noqa: C901 def run_pytorch_script( # noqa: C901 sources: dict[str, str], main: str, + is_baseline: bool = False, **kwargs, ) -> EvalResult: """ @@ -448,38 +448,39 @@ def run_pytorch_script( # noqa: C901 start = datetime.datetime.now() try: assert main in sources.keys() - - # Write submission files to directory _create_files(sources) # "compile" step: execute the script once. Will populate # `load_inline`'s compile cache, so the actual runs will be faster. - try: - compile_run = run_program(["python", "submission.py"], seed=1, timeout=Timeout.COMPILE) - if "-DTORCH_EXTENSION_NAME" in compile_run.stdout: + comp = None + if not is_baseline: + try: + compile_run = run_program(["python", + "submission.py"], + seed=1, + timeout=Timeout.COMPILE) + if "-DTORCH_EXTENSION_NAME" in compile_run.stdout: + comp = CompileResult( + nvcc_found=True, + nvcc_version="", + success=True, + command=compile_run.command, + stdout=compile_run.stdout, + stderr=compile_run.stderr, + exit_code=compile_run.exit_code, + ) + except subprocess.CalledProcessError as e: + # This step is purely optional, so we just go on + # if it fails comp = CompileResult( - nvcc_found=True, + nvcc_found=False, nvcc_version="", - success=True, - command=compile_run.command, - stdout=compile_run.stdout, - stderr=compile_run.stderr, - exit_code=compile_run.exit_code, + success=False, + command="python submission.py", + stdout=e.stdout, + stderr=e.stderr, + exit_code=e.returncode, ) - else: - comp = None - except subprocess.CalledProcessError as e: - # This step is purely optional, so we just go on - # if it fails - comp = CompileResult( - nvcc_found=False, - nvcc_version="", - success=False, - command="python submission.py", - stdout=e.stdout, - stderr=e.stderr, - exit_code=e.returncode, - ) run = run_single_evaluation(["python", main], **kwargs) @@ -511,7 +512,7 @@ def run_evaluation( require multiple runner calls. """ results: dict[str, EvalResult] = {} - if mode in ["test", "benchmark", "profile", "script"]: + if mode in ["test", "benchmark", "profile", "script", "baseline"]: results[mode] = call(mode=mode) elif mode in ["private", "leaderboard"]: # first, run the tests @@ -528,7 +529,7 @@ def run_evaluation( # if they pass, run the leaderboard validation results["leaderboard"] = call(mode="leaderboard") else: - raise AssertionError("Invalid mode") + raise AssertionError(f"Invalid mode: {mode}") return results @@ -544,6 +545,12 @@ def build_test_string(tests: list[dict]): def run_config(config: dict): + mode = config["mode"] + is_baseline = False + if mode == "baseline": + config["sources"].pop("submission.py", None) + is_baseline = True + common_args = { "tests": build_test_string(config.get("tests", [])), "benchmarks": build_test_string(config.get("benchmarks", [])), @@ -558,6 +565,7 @@ def run_config(config: dict): run_pytorch_script, sources=config["sources"], main=config["main"], + is_baseline=is_baseline, **common_args, ) elif config["lang"] == "cu": diff --git a/src/discord-cluster-manager/submission.py b/src/discord-cluster-manager/submission.py index 2777b15f..37c14de6 100644 --- a/src/discord-cluster-manager/submission.py +++ b/src/discord-cluster-manager/submission.py @@ -27,19 +27,31 @@ class ProcessedSubmissionRequest(SubmissionRequest): def prepare_submission(req: SubmissionRequest, lb_db: LeaderboardDB) -> ProcessedSubmissionRequest: - if profanity.contains_profanity(req.file_name): - raise KernelBotError("Please provide a non rude filename") + # Detect reference submissions (no file name & no code provided) + # A reference submission is identified by missing/empty code content (no user file) + is_baseline_submission = not req.code - # check file extension - if not req.file_name.endswith((".py", ".cu", ".cuh", ".cpp")): + # Perform filename/content related checks only for *non* reference submissions + if not is_baseline_submission: + if profanity.contains_profanity(req.file_name): + raise KernelBotError("Please provide a non rude filename") + + # check file extension (if filename provided) + if req.file_name and not req.file_name.endswith((".py", ".cu", ".cuh", ".cpp")): + raise KernelBotError( + "Please provide a Python (.py) or CUDA (.cu / .cuh / .cpp) file", + ) + + # process file directives (GPU selection / leaderboard name) + req = handle_popcorn_directives(req) + + # Ensure leaderboard name is present (might have come from the command directly) + if req.leaderboard is None: raise KernelBotError( - "Please provide a Python (.py) or CUDA (.cu / .cuh / .cpp) file", + "Missing leaderboard name. Either supply one as a command \ + argument or via ``#!POPCORN leaderboard `` directive.", ) - # process file directives - req = handle_popcorn_directives(req) - assert req.leaderboard is not None - leaderboard = lookup_leaderboard(req.leaderboard, lb_db) check_deadline(leaderboard) @@ -117,14 +129,6 @@ def handle_popcorn_directives(req: SubmissionRequest) -> SubmissionRequest: else: req.leaderboard = info["leaderboard"] - if req.leaderboard is None: - raise KernelBotError( - "Missing leaderboard name. " - "Either supply one as an argument in the submit command, or " - "specify it in your submission script using the " - "`{#,//}!POPCORN leaderboard ` directive.", - ) - return req diff --git a/src/discord-cluster-manager/task.py b/src/discord-cluster-manager/task.py index 3a14bc51..8641d4f7 100644 --- a/src/discord-cluster-manager/task.py +++ b/src/discord-cluster-manager/task.py @@ -65,6 +65,7 @@ class LeaderboardTask: templates: dict[str, str] = dataclasses.field(default_factory=dict) seed: Optional[int] = None + @staticmethod def from_dict(data: dict): data_ = copy.copy(data) diff --git a/src/discord-cluster-manager/utils.py b/src/discord-cluster-manager/utils.py index c39192f7..d63a44e1 100644 --- a/src/discord-cluster-manager/utils.py +++ b/src/discord-cluster-manager/utils.py @@ -245,7 +245,10 @@ def build_task_config( if lang == "py": config["main"] = "eval.py" - + args = [] + if mode == SubmissionMode.BASELINE: + submission_content = "" + config["args"] = args return { **config, "sources": { @@ -259,7 +262,6 @@ def build_task_config( all_files[n] = submission_content else: all_files[n] = c - common = { "lang": task.lang.value, "arch": arch,