Skip to content

Commit 7559e3f

Browse files
authored
chore(pegboard-serverless): auto-close runner ws (#3212)
* chore(epoxy): update peer urls when manually reconfiguring replicas * chore(engine): move guard to api public service kind * chore(pegboard-serverless): auto-close runner ws
1 parent 67a2ccf commit 7559e3f

File tree

5 files changed

+92
-40
lines changed

5 files changed

+92
-40
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/core/pegboard-serverless/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ rivet-types.workspace = true
1818
rivet-util.workspace = true
1919
tracing.workspace = true
2020
universaldb.workspace = true
21+
universalpubsub.workspace = true
2122
vbare.workspace = true
2223

2324
namespace.workspace = true

packages/core/pegboard-serverless/src/lib.rs

Lines changed: 65 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ use rivet_types::runner_configs::RunnerConfigKind;
1919
use tokio::{sync::oneshot, task::JoinHandle, time::Duration};
2020
use universaldb::options::StreamingMode;
2121
use universaldb::utils::IsolationLevel::*;
22+
use universalpubsub::PublishOpts;
2223
use vbare::OwnedVersionedData;
2324

2425
const X_RIVET_ENDPOINT: HeaderName = HeaderName::from_static("x-rivet-endpoint");
@@ -27,6 +28,8 @@ const X_RIVET_TOTAL_SLOTS: HeaderName = HeaderName::from_static("x-rivet-total-s
2728
const X_RIVET_RUNNER_NAME: HeaderName = HeaderName::from_static("x-rivet-runner-name");
2829
const X_RIVET_NAMESPACE_ID: HeaderName = HeaderName::from_static("x-rivet-namespace-id");
2930

31+
const DRAIN_GRACE_PERIOD: Duration = Duration::from_secs(10);
32+
3033
struct OutboundConnection {
3134
handle: JoinHandle<()>,
3235
shutdown_tx: oneshot::Sender<()>,
@@ -377,12 +380,14 @@ async fn outbound_handler(
377380
anyhow::Ok(())
378381
};
379382

383+
let sleep_until_drop = request_lifespan.saturating_sub(DRAIN_GRACE_PERIOD);
380384
tokio::select! {
381385
res = stream_handler => return res.map_err(Into::into),
382-
_ = tokio::time::sleep(request_lifespan) => {}
386+
_ = tokio::time::sleep(sleep_until_drop) => {}
383387
_ = shutdown_rx => {}
384388
}
385389

390+
// Stop runner
386391
draining.store(true, Ordering::SeqCst);
387392

388393
ctx.msg(rivet_types::msgs::pegboard::BumpServerlessAutoscaler {})
@@ -394,34 +399,56 @@ async fn outbound_handler(
394399
}
395400

396401
// Continue waiting on req while draining
397-
while let Some(event) = source.next().await {
398-
match event {
399-
Ok(sse::Event::Open) => {}
400-
Ok(sse::Event::Message(msg)) => {
401-
tracing::debug!(%msg.data, "received outbound req message");
402-
403-
// If runner_id is none at this point it means we did not send the stopping signal yet, so
404-
// send it now
405-
if runner_id.is_none() {
406-
let data = BASE64.decode(msg.data).context("invalid base64 message")?;
407-
let payload =
402+
let wait_for_shutdown_fut = async move {
403+
while let Some(event) = source.next().await {
404+
match event {
405+
Ok(sse::Event::Open) => {}
406+
Ok(sse::Event::Message(msg)) => {
407+
tracing::debug!(%msg.data, "received outbound req message");
408+
409+
// If runner_id is none at this point it means we did not send the stopping signal yet, so
410+
// send it now
411+
if runner_id.is_none() {
412+
let data = BASE64.decode(msg.data).context("invalid base64 message")?;
413+
let payload =
408414
protocol::versioned::ToServerlessServer::deserialize_with_embedded_version(
409415
&data,
410416
)
411417
.context("invalid payload")?;
412418

413-
match payload {
414-
protocol::ToServerlessServer::ToServerlessServerInit(init) => {
415-
let runner_id =
416-
Id::parse(&init.runner_id).context("invalid runner id")?;
417-
stop_runner(ctx, runner_id).await?;
419+
match payload {
420+
protocol::ToServerlessServer::ToServerlessServerInit(init) => {
421+
let runner_id_local =
422+
Id::parse(&init.runner_id).context("invalid runner id")?;
423+
runner_id = Some(runner_id_local);
424+
stop_runner(ctx, runner_id_local).await?;
425+
}
418426
}
419427
}
420428
}
429+
Err(sse::Error::StreamEnded) => break,
430+
Err(err) => return Err(err.into()),
421431
}
422-
Err(sse::Error::StreamEnded) => break,
423-
Err(err) => return Err(err.into()),
424432
}
433+
434+
Result::<()>::Ok(())
435+
};
436+
437+
// Wait for runner to shut down
438+
tokio::select! {
439+
res = wait_for_shutdown_fut => return res.map_err(Into::into),
440+
_ = tokio::time::sleep(DRAIN_GRACE_PERIOD) => {
441+
tracing::debug!("reached drain grace period before runner shut down")
442+
}
443+
444+
}
445+
446+
// Close connection
447+
//
448+
// This will force the runner to stop the request in order to avoid hitting the serverless
449+
// timeout threshold
450+
if let Some(runner_id) = runner_id {
451+
publish_to_client_stop(ctx, runner_id).await?;
425452
}
426453

427454
tracing::debug!("outbound req stopped");
@@ -454,3 +481,22 @@ async fn stop_runner(ctx: &StandaloneCtx, runner_id: Id) -> Result<()> {
454481

455482
Ok(())
456483
}
484+
485+
/// Send a stop message to the client.
486+
///
487+
/// This will close the runner's WebSocket..
488+
async fn publish_to_client_stop(ctx: &StandaloneCtx, runner_id: Id) -> Result<()> {
489+
let receiver_subject =
490+
pegboard::pubsub_subjects::RunnerReceiverSubject::new(runner_id).to_string();
491+
492+
let message_serialized = rivet_runner_protocol::versioned::ToClient::latest(
493+
rivet_runner_protocol::ToClient::ToClientClose,
494+
)
495+
.serialize_with_embedded_version(rivet_runner_protocol::PROTOCOL_VERSION)?;
496+
497+
ctx.ups()?
498+
.publish(&receiver_subject, &message_serialized, PublishOpts::one())
499+
.await?;
500+
501+
Ok(())
502+
}

packages/infra/engine/src/commands/start.rs

Lines changed: 23 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ pub struct Opts {
1414

1515
/// Exclude the specified services instead of including them
1616
#[arg(long)]
17-
exclude_services: bool,
17+
except_services: Vec<ServiceKind>,
1818
}
1919

2020
#[derive(clap::ValueEnum, Clone, PartialEq)]
@@ -55,34 +55,37 @@ impl Opts {
5555
}
5656

5757
// Select services to run
58-
let services = if self.services.is_empty() {
58+
let services = if self.services.is_empty() && self.except_services.is_empty() {
5959
// Run all services
6060
run_config.services.clone()
61+
} else if !self.except_services.is_empty() {
62+
// Exclude specified services
63+
let except_service_kinds = self
64+
.except_services
65+
.iter()
66+
.map(|x| x.clone().into())
67+
.collect::<Vec<rivet_service_manager::ServiceKind>>();
68+
69+
run_config
70+
.services
71+
.iter()
72+
.filter(|x| !except_service_kinds.iter().any(|y| y.eq(&x.kind)))
73+
.cloned()
74+
.collect::<Vec<_>>()
6175
} else {
62-
// Filter services
76+
// Include only specified services
6377
let service_kinds = self
6478
.services
6579
.iter()
6680
.map(|x| x.clone().into())
6781
.collect::<Vec<rivet_service_manager::ServiceKind>>();
6882

69-
if self.exclude_services {
70-
// Exclude specified services
71-
run_config
72-
.services
73-
.iter()
74-
.filter(|x| !service_kinds.iter().any(|y| y.eq(&x.kind)))
75-
.cloned()
76-
.collect::<Vec<_>>()
77-
} else {
78-
// Include only specified services
79-
run_config
80-
.services
81-
.iter()
82-
.filter(|x| service_kinds.iter().any(|y| y.eq(&x.kind)))
83-
.cloned()
84-
.collect::<Vec<_>>()
85-
}
83+
run_config
84+
.services
85+
.iter()
86+
.filter(|x| service_kinds.iter().any(|y| y.eq(&x.kind)))
87+
.cloned()
88+
.collect::<Vec<_>>()
8689
};
8790

8891
// Start server

packages/infra/engine/src/run_config.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@ pub fn config(_rivet_config: rivet_config::Config) -> Result<RunConfigData> {
1919
}),
2020
Service::new(
2121
"pegboard_serverless",
22-
ServiceKind::Standalone,
22+
// There should only be one of these, since it's auto-scaling requests
23+
ServiceKind::Singleton,
2324
|config, pools| Box::pin(pegboard_serverless::start(config, pools)),
2425
),
2526
Service::new(

0 commit comments

Comments
 (0)