diff --git a/dashboard/modules/job/tests/test_job_manager.py b/dashboard/modules/job/tests/test_job_manager.py index 952caa96c26c..df3cf76ebedf 100644 --- a/dashboard/modules/job/tests/test_job_manager.py +++ b/dashboard/modules/job/tests/test_job_manager.py @@ -231,6 +231,32 @@ async def _run_hanging_command(job_manager, tmp_dir, start_signal_actor=None): return pid_file, tmp_file, job_id +@pytest.mark.asyncio +async def test_monitor_job_pending(job_manager): + """Test that monitor_job does not error when the job is PENDING.""" + + # Create a signal actor to keep the job pending. + start_signal_actor = SignalActor.remote() + + # Submit a job. + job_id = await job_manager.submit_job( + entrypoint="echo 'hello world'", + _start_signal_actor=start_signal_actor, + ) + + # Trigger _recover_running_jobs while the job is still pending. This + # will pick up the new pending job. + await job_manager._recover_running_jobs() + + # Trigger the job to start. + ray.get(start_signal_actor.send.remote()) + + # Wait for the job to finish. + await async_wait_for_condition_async_predicate( + check_job_succeeded, job_manager=job_manager, job_id=job_id + ) + + async def check_job_succeeded(job_manager, job_id): data = await job_manager.get_job_info(job_id) status = data.status