@@ -294,7 +294,9 @@ pub async fn pegboard_actor(ctx: &mut WorkflowCtx, input: &Input) -> Result<()>
294
294
ctx,
295
295
input. actor_id ,
296
296
state. generation ,
297
- state. runner_workflow_id ,
297
+ state. runner_workflow_id . context (
298
+ "should have runner_workflow_id set if sleeping" ,
299
+ ) ?,
298
300
)
299
301
. await ?;
300
302
}
@@ -311,7 +313,9 @@ pub async fn pegboard_actor(ctx: &mut WorkflowCtx, input: &Input) -> Result<()>
311
313
ctx,
312
314
input. actor_id ,
313
315
state. generation ,
314
- state. runner_workflow_id ,
316
+ state. runner_workflow_id . context (
317
+ "should have runner_workflow_id set if stopping" ,
318
+ ) ?,
315
319
)
316
320
. await ?;
317
321
}
@@ -330,7 +334,9 @@ pub async fn pegboard_actor(ctx: &mut WorkflowCtx, input: &Input) -> Result<()>
330
334
. await ?;
331
335
332
336
ctx. msg ( Ready {
333
- runner_id : state. runner_id ,
337
+ runner_id : state
338
+ . runner_id
339
+ . context ( "should have runner_id set if running" ) ?,
334
340
} )
335
341
. tag ( "actor_id" , input. actor_id )
336
342
. send ( )
@@ -355,20 +361,28 @@ pub async fn pegboard_actor(ctx: &mut WorkflowCtx, input: &Input) -> Result<()>
355
361
}
356
362
}
357
363
Main :: Wake ( _sig) => {
358
- // Ignore wake if we are not sleeping. This is expected to happen under certain
359
- // circumstances.
360
364
if state. sleeping {
361
- state. alarm_ts = None ;
362
- state. sleeping = false ;
363
-
364
- if runtime:: reschedule_actor ( ctx, & input, state) . await ? {
365
- // Destroyed early
366
- return Ok ( Loop :: Break ( runtime:: LifecycleRes {
367
- generation : state. generation ,
368
- // False here because if we received the destroy signal, it is
369
- // guaranteed that we did not allocate another actor.
370
- kill : false ,
371
- } ) ) ;
365
+ if state. runner_id . is_none ( ) {
366
+ state. alarm_ts = None ;
367
+ state. sleeping = false ;
368
+ state. will_wake = false ;
369
+
370
+ if runtime:: reschedule_actor ( ctx, & input, state) . await ? {
371
+ // Destroyed early
372
+ return Ok ( Loop :: Break ( runtime:: LifecycleRes {
373
+ generation : state. generation ,
374
+ // False here because if we received the destroy signal, it is
375
+ // guaranteed that we did not allocate another actor.
376
+ kill : false ,
377
+ } ) ) ;
378
+ }
379
+ } else {
380
+ state. will_wake = true ;
381
+
382
+ tracing:: debug!(
383
+ actor_id=?input. actor_id,
384
+ "cannot wake an actor that intends to sleep but has not stopped yet, deferring wake until after stop" ,
385
+ ) ;
372
386
}
373
387
} else {
374
388
tracing:: debug!(
@@ -447,27 +461,30 @@ async fn handle_stopped(
447
461
) -> Result < Option < runtime:: LifecycleRes > > {
448
462
tracing:: debug!( ?code, "actor stopped" ) ;
449
463
450
- // Reset retry count
464
+ // Reset retry count on successful exit
451
465
if let Some ( protocol:: StopCode :: Ok ) = code {
452
466
state. reschedule_state = Default :: default ( ) ;
453
467
}
454
468
469
+ // Clear stop gc timeout to prevent being marked as lost in the lifecycle loop
455
470
state. gc_timeout_ts = None ;
471
+ state. runner_id = None ;
472
+ state. runner_workflow_id = None ;
456
473
457
474
ctx. activity ( runtime:: DeallocateInput {
458
475
actor_id : input. actor_id ,
459
476
} )
460
477
. await ?;
461
478
462
- // Allocate other pending actors from queue
479
+ // Allocate other pending actors from queue since a slot has now cleared
463
480
let res = ctx
464
481
. activity ( AllocatePendingActorsInput {
465
482
namespace_id : input. namespace_id ,
466
483
name : input. runner_name_selector . clone ( ) ,
467
484
} )
468
485
. await ?;
469
486
470
- // Dispatch pending allocs
487
+ // Dispatch pending allocs (if any)
471
488
for alloc in res. allocations {
472
489
ctx. signal ( alloc. signal )
473
490
. to_workflow :: < Workflow > ( )
@@ -476,6 +493,7 @@ async fn handle_stopped(
476
493
. await ?;
477
494
}
478
495
496
+ // Handle rescheduling if not marked as sleeping
479
497
if !state. sleeping {
480
498
let failed = matches ! ( code, None | Some ( protocol:: StopCode :: Error ) ) ;
481
499
@@ -487,7 +505,9 @@ async fn handle_stopped(
487
505
ctx,
488
506
input. actor_id ,
489
507
state. generation ,
490
- state. runner_workflow_id ,
508
+ state
509
+ . runner_workflow_id
510
+ . context ( "should have runner_workflow_id set if not sleeping" ) ?,
491
511
)
492
512
. await ?;
493
513
}
@@ -531,6 +551,20 @@ async fn handle_stopped(
531
551
}
532
552
}
533
553
}
554
+ // Rewake actor immediately after stopping if `will_wake` was set
555
+ else if state. will_wake {
556
+ state. will_wake = false ;
557
+
558
+ if runtime:: reschedule_actor ( ctx, & input, state) . await ? {
559
+ // Destroyed early
560
+ return Ok ( Some ( runtime:: LifecycleRes {
561
+ generation : state. generation ,
562
+ // False here because if we received the destroy signal, it is
563
+ // guaranteed that we did not allocate another actor.
564
+ kill : false ,
565
+ } ) ) ;
566
+ }
567
+ }
534
568
535
569
Ok ( None )
536
570
}
0 commit comments