diff --git a/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts b/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts index 6e09afdebf..6dbb084ac7 100644 --- a/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts +++ b/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts @@ -89,6 +89,19 @@ class RemoteMachineTrainingService implements TrainingService { this.sshConnectionPromises = []; // initialize gpuScheduler this.gpuScheduler = new GPUScheduler(this.machineExecutorManagerMap); + if (this.trialConfig === undefined) { + throw new Error("trial config not initialized!"); + } + // Copy codeDir to remote machine + for (const [rmMeta, executorManager] of this.machineExecutorManagerMap.entries()) { + const executor: ShellExecutor = await executorManager.getExecutor(this.initExecutorId); + if (executor !== undefined) { + this.machineCopyExpCodeDirPromiseMap.set( + rmMeta, + executor.copyDirectoryToRemote(this.trialConfig.codeDir, executor.getRemoteCodePath(getExperimentId())) + ); + } + } } while (!this.stopping) { while (this.jobQueue.length > 0) { @@ -328,20 +341,8 @@ class RemoteMachineTrainingService implements TrainingService { try { // Validate to make sure codeDir doesn't have too many files await validateCodeDir(remoteMachineTrailConfig.codeDir); - // Copy codeDir to remote machine - for (const [rmMeta, executorManager] of this.machineExecutorManagerMap.entries()) { - const executor: ShellExecutor = await executorManager.getExecutor(this.initExecutorId); - if (executor !== undefined) { - this.machineCopyExpCodeDirPromiseMap.set( - rmMeta, - executor.copyDirectoryToRemote(remoteMachineTrailConfig.codeDir, executor.getRemoteCodePath(getExperimentId())) - ); - } - } - } catch (error) { this.log.error(error); - return Promise.reject(new Error(error)); } diff --git a/test/config/training_service.yml b/test/config/training_service.yml index 0ae24d1aaf..998d39abaa 100644 --- a/test/config/training_service.yml +++ b/test/config/training_service.yml @@ -10,7 +10,7 @@ kubeflow: kubeflowConfig: operator: tf-operator - apiVersion: v1alpha2 + apiVersion: v1 storage: azureStorage keyVault: vaultName: