This repository has been archived by the owner on Sep 18, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1.8k
Support save and open experiments #2750
Merged
Merged
Changes from all commits
Commits
Show all changes
27 commits
Select commit
Hold shift + click to select a range
dcd2ffd
Merge pull request #251 from microsoft/master
SparkSnail 3b8b6fb
Merge pull request #252 from microsoft/master
SparkSnail 916e444
Merge pull request #253 from microsoft/master
SparkSnail caeffb8
Merge pull request #254 from microsoft/master
SparkSnail 57c300e
Merge pull request #255 from microsoft/master
SparkSnail 65660e6
Merge pull request #257 from microsoft/master
SparkSnail 9376d6a
Merge pull request #258 from microsoft/master
SparkSnail 5fef3cf
Merge pull request #259 from microsoft/master
SparkSnail 5544ae8
Merge pull request #261 from microsoft/master
SparkSnail f9fdfee
Merge pull request #262 from microsoft/master
SparkSnail c5e26ef
add trial job detail link
SparkSnail 10a04ba
Merge branch 'master' of https://github.com/SparkSnail/nni
SparkSnail 60c888f
init
SparkSnail aa64fe6
Merge pull request #263 from microsoft/master
SparkSnail 0a42e9a
Merge branch 'master' of https://github.com/SparkSnail/nni into dev-n…
SparkSnail d68f73f
update command
SparkSnail c6a5f8c
Merge pull request #264 from microsoft/master
SparkSnail 81facec
Merge branch 'master' of https://github.com/SparkSnail/nni into dev-n…
SparkSnail 806ce57
add doc
SparkSnail 2f3fb7a
fix pylint
SparkSnail dfdcf1f
fix pylint
SparkSnail 576bfe9
change args to nnictl experiment load
SparkSnail aa708ca
fix pylint
SparkSnail 0080891
fix comments
SparkSnail 4ada470
fix comments
SparkSnail 07ab608
fix comments
SparkSnail e9d0ebc
fix comments
SparkSnail File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -18,9 +18,9 @@ | |
from .rest_utils import rest_get, rest_delete, check_rest_server_quick, check_response | ||
from .url_utils import trial_jobs_url, experiment_url, trial_job_id_url, export_data_url | ||
from .config_utils import Config, Experiments | ||
from .constants import NNICTL_HOME_DIR, EXPERIMENT_INFORMATION_FORMAT, EXPERIMENT_DETAIL_FORMAT, \ | ||
from .constants import NNICTL_HOME_DIR, NNI_HOME_DIR, EXPERIMENT_INFORMATION_FORMAT, EXPERIMENT_DETAIL_FORMAT, \ | ||
EXPERIMENT_MONITOR_INFO, TRIAL_MONITOR_HEAD, TRIAL_MONITOR_CONTENT, TRIAL_MONITOR_TAIL, REST_TIME_OUT | ||
from .common_utils import print_normal, print_error, print_warning, detect_process, get_yml_content | ||
from .common_utils import print_normal, print_error, print_warning, detect_process, get_yml_content, generate_temp_dir | ||
from .command_utils import check_output_command, kill_command | ||
from .ssh_utils import create_ssh_sftp_client, remove_remote_directory | ||
|
||
|
@@ -736,3 +736,165 @@ def search_space_auto_gen(args): | |
print_warning('Expected search space file \'{}\' generated, but not found.'.format(file_path)) | ||
else: | ||
print_normal('Generate search space done: \'{}\'.'.format(file_path)) | ||
|
||
def save_experiment(args): | ||
'''save experiment data to a zip file''' | ||
experiment_config = Experiments() | ||
experiment_dict = experiment_config.get_all_experiments() | ||
if args.id is None: | ||
print_error('Please set experiment id.') | ||
exit(1) | ||
if args.id not in experiment_dict: | ||
print_error('Cannot find experiment {0}.'.format(args.id)) | ||
exit(1) | ||
if experiment_dict[args.id].get('status') != 'STOPPED': | ||
print_error('Can only save stopped experiment!') | ||
exit(1) | ||
print_normal('Saving...') | ||
nni_config = Config(experiment_dict[args.id]['fileName']) | ||
logDir = os.path.join(NNI_HOME_DIR, args.id) | ||
if nni_config.get_config('logDir'): | ||
logDir = os.path.join(nni_config.get_config('logDir'), args.id) | ||
temp_root_dir = generate_temp_dir() | ||
|
||
# Step1. Copy logDir to temp folder | ||
if not os.path.exists(logDir): | ||
print_error('logDir: %s does not exist!' % logDir) | ||
exit(1) | ||
temp_experiment_dir = os.path.join(temp_root_dir, 'experiment') | ||
shutil.copytree(logDir, temp_experiment_dir) | ||
|
||
# Step2. Copy nnictl metadata to temp folder | ||
temp_nnictl_dir = os.path.join(temp_root_dir, 'nnictl') | ||
os.makedirs(temp_nnictl_dir, exist_ok=True) | ||
try: | ||
with open(os.path.join(temp_nnictl_dir, '.experiment'), 'w') as file: | ||
experiment_dict[args.id]['id'] = args.id | ||
json.dump(experiment_dict[args.id], file) | ||
except IOError: | ||
print_error('Write file to %s failed!' % os.path.join(temp_nnictl_dir, '.experiment')) | ||
exit(1) | ||
nnictl_config_dir = os.path.join(NNICTL_HOME_DIR, experiment_dict[args.id]['fileName']) | ||
shutil.copytree(nnictl_config_dir, os.path.join(temp_nnictl_dir, experiment_dict[args.id]['fileName'])) | ||
|
||
# Step3. Copy code dir | ||
if args.saveCodeDir: | ||
temp_code_dir = os.path.join(temp_root_dir, 'code') | ||
shutil.copytree(nni_config.get_config('experimentConfig')['trial']['codeDir'], temp_code_dir) | ||
|
||
# Step4. Archive folder | ||
zip_package_name = 'nni_experiment_%s' % args.id | ||
if args.path: | ||
os.makedirs(args.path, exist_ok=True) | ||
zip_package_name = os.path.join(args.path, zip_package_name) | ||
shutil.make_archive(zip_package_name, 'zip', temp_root_dir) | ||
print_normal('Save to %s.zip success!' % zip_package_name) | ||
|
||
# Step5. Cleanup temp data | ||
shutil.rmtree(temp_root_dir) | ||
|
||
def load_experiment(args): | ||
'''load experiment data''' | ||
package_path = os.path.expanduser(args.path) | ||
if not os.path.exists(args.path): | ||
print_error('file path %s does not exist!' % args.path) | ||
exit(1) | ||
temp_root_dir = generate_temp_dir() | ||
shutil.unpack_archive(package_path, temp_root_dir) | ||
print_normal('Loading...') | ||
# Step1. Validation | ||
if not os.path.exists(args.codeDir): | ||
print_error('Invalid: codeDir path does not exist!') | ||
exit(1) | ||
if args.logDir: | ||
if not os.path.exists(args.logDir): | ||
print_error('Invalid: logDir path does not exist!') | ||
exit(1) | ||
experiment_temp_dir = os.path.join(temp_root_dir, 'experiment') | ||
if not os.path.exists(os.path.join(experiment_temp_dir, 'db')): | ||
print_error('Invalid archive file: db file does not exist!') | ||
shutil.rmtree(temp_root_dir) | ||
exit(1) | ||
nnictl_temp_dir = os.path.join(temp_root_dir, 'nnictl') | ||
if not os.path.exists(os.path.join(nnictl_temp_dir, '.experiment')): | ||
print_error('Invalid archive file: nnictl metadata file does not exist!') | ||
shutil.rmtree(temp_root_dir) | ||
exit(1) | ||
try: | ||
with open(os.path.join(nnictl_temp_dir, '.experiment'), 'r') as file: | ||
experiment_metadata = json.load(file) | ||
except ValueError as err: | ||
print_error('Invalid nnictl metadata file: %s' % err) | ||
shutil.rmtree(temp_root_dir) | ||
exit(1) | ||
experiment_config = Experiments() | ||
experiment_dict = experiment_config.get_all_experiments() | ||
experiment_id = experiment_metadata.get('id') | ||
if experiment_id in experiment_dict: | ||
print_error('Invalid: experiment id already exist!') | ||
shutil.rmtree(temp_root_dir) | ||
exit(1) | ||
if not os.path.exists(os.path.join(nnictl_temp_dir, experiment_metadata.get('fileName'))): | ||
print_error('Invalid: experiment metadata does not exist!') | ||
shutil.rmtree(temp_root_dir) | ||
exit(1) | ||
|
||
# Step2. Copy nnictl metadata | ||
src_path = os.path.join(nnictl_temp_dir, experiment_metadata.get('fileName')) | ||
dest_path = os.path.join(NNICTL_HOME_DIR, experiment_metadata.get('fileName')) | ||
if os.path.exists(dest_path): | ||
shutil.rmtree(dest_path) | ||
shutil.copytree(src_path, dest_path) | ||
|
||
# Step3. Copy experiment data | ||
nni_config = Config(experiment_metadata.get('fileName')) | ||
nnictl_exp_config = nni_config.get_config('experimentConfig') | ||
if args.logDir: | ||
logDir = args.logDir | ||
nnictl_exp_config['logDir'] = logDir | ||
else: | ||
if nnictl_exp_config.get('logDir'): | ||
logDir = nnictl_exp_config['logDir'] | ||
else: | ||
logDir = NNI_HOME_DIR | ||
os.rename(os.path.join(temp_root_dir, 'experiment'), os.path.join(temp_root_dir, experiment_id)) | ||
src_path = os.path.join(os.path.join(temp_root_dir, experiment_id)) | ||
dest_path = os.path.join(os.path.join(logDir, experiment_id)) | ||
if os.path.exists(dest_path): | ||
shutil.rmtree(dest_path) | ||
shutil.copytree(src_path, dest_path) | ||
|
||
# Step4. Copy code dir | ||
codeDir = os.path.expanduser(args.codeDir) | ||
if not os.path.isabs(codeDir): | ||
codeDir = os.path.join(os.getcwd(), codeDir) | ||
print_normal('Expand codeDir to %s' % codeDir) | ||
nnictl_exp_config['trial']['codeDir'] = codeDir | ||
archive_code_dir = os.path.join(temp_root_dir, 'code') | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why not copy entire 'code' dir using shutil.copytree? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
if os.path.exists(archive_code_dir): | ||
file_list = os.listdir(archive_code_dir) | ||
for file_name in file_list: | ||
src_path = os.path.join(archive_code_dir, file_name) | ||
target_path = os.path.join(codeDir, file_name) | ||
if os.path.exists(target_path): | ||
print_error('Copy %s failed, %s exist!' % (file_name, target_path)) | ||
continue | ||
if os.path.isdir(src_path): | ||
shutil.copytree(src_path, target_path) | ||
else: | ||
shutil.copy(src_path, target_path) | ||
|
||
# Step5. Create experiment metadata | ||
nni_config.set_config('experimentConfig', nnictl_exp_config) | ||
experiment_config.add_experiment(experiment_id, | ||
experiment_metadata.get('port'), | ||
experiment_metadata.get('startTime'), | ||
experiment_metadata.get('fileName'), | ||
experiment_metadata.get('platform'), | ||
experiment_metadata.get('experimentName'), | ||
experiment_metadata.get('endTime'), | ||
experiment_metadata.get('status')) | ||
print_normal('Load experiment %s succsss!' % experiment_id) | ||
|
||
# Step6. Cleanup temp data | ||
shutil.rmtree(temp_root_dir) |
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is it possible that the dir already existed?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
the dir name is generate by
random()
function, will not exist logically, I also add logic to handle this scenario, if the dir exist, nni will regenerate a new dir until it is not existed originally.