Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion examples/identity_py/submission.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!POPCORN leaderboard identity_py
#!POPCORN leaderboard identity_py-dev

from task import input_t, output_t

Expand Down
3 changes: 2 additions & 1 deletion src/discord-cluster-manager/bot.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
POSTGRES_USER,
init_environment,
)
from launchers import GitHubLauncher, ModalLauncher
from launchers import GitHubLauncher, ModalLauncher, GenericLauncher
from leaderboard_db import LeaderboardDB
from utils import setup_logging

Expand Down Expand Up @@ -80,6 +80,7 @@ async def setup_hook(self):
submit_cog = SubmitCog(self)
submit_cog.register_launcher(ModalLauncher(consts.MODAL_CUDA_INCLUDE_DIRS))
submit_cog.register_launcher(GitHubLauncher(env.GITHUB_REPO, env.GITHUB_TOKEN))
submit_cog.register_launcher(GenericLauncher("http://65.108.32.167:8000/run", token='TOKEN'))
await self.add_cog(submit_cog)
await self.add_cog(BotManagerCog(self))
await self.add_cog(LeaderboardCog(self))
Expand Down
3 changes: 2 additions & 1 deletion src/discord-cluster-manager/cogs/admin_cog.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
import discord
import env
import yaml
from consts import GitHubGPU, ModalGPU
from consts import GitHubGPU, ModalGPU, OtherGPU
from discord import app_commands
from discord.ext import commands, tasks
from leaderboard_db import leaderboard_name_autocomplete
Expand Down Expand Up @@ -153,6 +153,7 @@ async def is_creator_check(
@app_commands.choices(
gpu=[app_commands.Choice(name=gpu.name, value=gpu.value) for gpu in GitHubGPU]
+ [app_commands.Choice(name=gpu.name, value=gpu.value) for gpu in ModalGPU]
+ [app_commands.Choice(name=gpu.name, value=gpu.value) for gpu in OtherGPU]
)
@with_error_handling
async def leaderboard_create_local(
Expand Down
7 changes: 6 additions & 1 deletion src/discord-cluster-manager/consts.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ class ModalGPU(Enum):
B200 = "B200"


class OtherGPU(Enum):
A6000 = "A6000"


@dataclasses.dataclass
class GPU:
name: str
Expand All @@ -48,7 +52,7 @@ def _make_gpu_lookup(runner_map: dict[str, Type[Enum]]):
return lookup


_GPU_LOOKUP = _make_gpu_lookup({"Modal": ModalGPU, "GitHub": GitHubGPU})
_GPU_LOOKUP = _make_gpu_lookup({"Modal": ModalGPU, "GitHub": GitHubGPU, "Generic": OtherGPU})


def get_gpu_by_name(name: str) -> GPU:
Expand Down Expand Up @@ -114,6 +118,7 @@ class RankCriterion(Enum):
"T4": "75",
"L4": "80",
"A100": "80",
"A6000": "86",
"H100": "90a",
"B200": "100",
"NVIDIA": None,
Expand Down
2 changes: 1 addition & 1 deletion src/discord-cluster-manager/launchers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .github import GitHubLauncher
from .launcher import Launcher
from .modal import ModalLauncher

from .generic import GenericLauncher
__all__ = [Launcher, GitHubLauncher, ModalLauncher]
63 changes: 63 additions & 0 deletions src/discord-cluster-manager/launchers/generic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# Generic launcher POSTs to a specific URL
import asyncio
import datetime
import json

import requests

from consts import GPU, OtherGPU
from report import RunProgressReporter
from run_eval import FullResult, CompileResult, RunResult, EvalResult, SystemInfo
from utils import setup_logging, KernelBotError

from .launcher import Launcher

logger = setup_logging(__name__)


class GenericLauncher(Launcher):
def __init__(self, url: str, token: str):
super().__init__("Generic", gpus=OtherGPU)
self.url = url
self.token = token

async def run_submission(
self, config: dict, gpu_type: GPU, status: RunProgressReporter
) -> FullResult:
loop = asyncio.get_event_loop()
logger.info(f"Calling {self.url}")

await status.push("⏳ Waiting for run to finish...")
result = await loop.run_in_executor(
None,
lambda: requests.post(self.url, json={"config": config, "token": self.token})
)

print(result.text)

await status.update("✅ Waiting for run to finish... Done")
if result.status_code != 200:
logger.error("Error running submission. Status code %d, Message: %s", result.status_code, result.text)
raise KernelBotError(f"Error running submission. Status code {result.status_code}")

# TODO: this code is duplicated :(
data = result.json()
runs = {}
# convert json back to EvalResult structures, which requires
# special handling for datetime and our dataclasses.
for k, v in data["runs"].items():
if "compilation" in v and v["compilation"] is not None:
comp = CompileResult(**v["compilation"])
else:
comp = None
run = RunResult(**v["run"])
res = EvalResult(
start=datetime.datetime.fromisoformat(v["start"]),
end=datetime.datetime.fromisoformat(v["end"]),
compilation=comp,
run=run,
)
runs[k] = res

system = SystemInfo(**data.get("system", {}))
return FullResult(success=True, error="", runs=runs, system=system)
6 changes: 6 additions & 0 deletions src/discord-cluster-manager/standalone-init.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
apt install python3-pip
pip install uv --break-system-packages
uv venv
source .venv/bin/activate
uv pip install -r requirements.txt
uv pip install torch numpy
53 changes: 53 additions & 0 deletions src/discord-cluster-manager/standalone-runner.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
import asyncio
import os
from dataclasses import asdict
from pydantic import BaseModel

import uvicorn

from run_eval import run_config

from fastapi import FastAPI, HTTPException

app = FastAPI()


_serial_run = asyncio.Semaphore(1)
_runner_token = None


class RunRequest(BaseModel):
config: dict
token: str


@app.post("/run")
async def run(request: RunRequest) -> dict:
# only one submission can run at any given time
if request.token != _runner_token:
raise HTTPException(status_code=401, detail="Invalid token")
async with _serial_run:
return asdict(run_config(request.config))


async def run_server(port):
config = uvicorn.Config(
app,
host="0.0.0.0",
port=port,
log_level="info",
limit_concurrency=2,
)
server = uvicorn.Server(config)

# we need this as discord and fastapi both run on the same event loop
await server.serve()


def main():
with asyncio.Runner() as runner:
runner.run(run_server(port=int(os.environ.get("PORT") or 8000)))


if __name__ == "__main__":
main()
Loading