diff --git a/.dev_scripts/benchmark/benchmark_cfg.yaml b/.dev_scripts/benchmark/benchmark_cfg.yaml index c75dcb5ebb..261d435c08 100644 --- a/.dev_scripts/benchmark/benchmark_cfg.yaml +++ b/.dev_scripts/benchmark/benchmark_cfg.yaml @@ -24,7 +24,7 @@ model_list: checkpoint: https://download.openmmlab.com/mmpose/top_down/rsn/rsn18_coco_256x192-72f4b4a7_20201127.pth # ViPNAS ## ViPNAS + COCO - - config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/s_vipnas_res50_coco_256x192.py + - config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_res50_coco_256x192.py checkpoint: https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_coco_256x192-cc43b466_20210624.pth # HRNetV2 ## HRNetV2 + AFLW @@ -70,7 +70,7 @@ model_list: # CPM ## CPM + COCO - config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/cpm_coco_256x192.py - checkpoint: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_coco_256x192-aa4ba095_20200817. + checkpoint: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_coco_256x192-aa4ba095_20200817.pth ## CPM + JHMDB - config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb_sub1_368x368.py checkpoint: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub1_368x368-2d2585c9_20201122.pth diff --git a/.dev_scripts/benchmark/benchmark_regression.py b/.dev_scripts/benchmark/benchmark_regression.py index c21856f9b4..57d9323c5d 100644 --- a/.dev_scripts/benchmark/benchmark_regression.py +++ b/.dev_scripts/benchmark/benchmark_regression.py @@ -9,10 +9,22 @@ import mmcv +DEFAULT_TEST_ARGS = dict( + gpus=1, + gpus_per_node=1, + cpus_per_task=5, +) + +DEFAULT_TRAIN_ARGS = dict( + gpus=8, + gpus_per_node=8, + cpus_per_task=5, +) + def is_port_available(port, host='127.0.0.1'): - """check whether a port is in use return True if the port is available else - False.""" + """check whether a port is in use, return True if the port is available + else False.""" s = None try: s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) @@ -29,41 +41,48 @@ def is_port_available(port, host='127.0.0.1'): def parse_args(): parser = argparse.ArgumentParser( description='running benchmark regression with tmux') + parser.add_argument( + '--partition', + '-p', + help='models with priority higher or equal to this will be included') + parser.add_argument( '--config', + '-c', help='test config file path', - default='./.dev_scripts/benchmark/benchmark_regression_cfg.yaml') + default='./.dev_scripts/benchmark/benchmark_cfg.yaml') + parser.add_argument( + '--mode', + help='the benchmark regression mode, can be "test" or "train"', + default='test') + parser.add_argument( '--priority', - nargs=2, type=int, - help='largest priority for test and train tasks respectively', - default=[3, 3]) + help='models with priority higher or equal to this will be included', + default=2) # runtime setting parameters parser.add_argument( - '--root-work-dir', help='the root working directory to store logs') + '--root-work-dir', + '-r', + help='the root working directory to store logs') parser.add_argument( - '--session-name', '-s', help='the tmux session name', default='test') + '--session-name', + '-s', + help='the tmux session name', + default='benchmark_regression') parser.add_argument( '--panes-per-window', + '-w', type=int, help='the maximum number of panes in each tmux window', default=12) parser.add_argument( '--env', + '-e', help='the conda environment used to run the tasks', default='pt1.6') - parser.add_argument( - '--partition', help='the partition name', default='mm_human') - parser.add_argument('--gpus', help='the total number of GPUs', default=8) - parser.add_argument( - '--gpus-per-node', - default=8, - help='the number of GPUs used per computing node', - choices=[1, 2, 3, 4, 5, 6, 7, 8]) - parser.add_argument( - '--cpus-per-task', default=5, help='the number of CPUs used per task') args = parser.parse_args() return args @@ -76,22 +95,15 @@ def main(): # get the current time stamp now = datetime.now() ts = now.strftime('%Y_%m_%d_%H_%M') - args.root_work_dir = f'work_dirs/benchmark_regression_{ts}' + args.root_work_dir = f'work_dirs/benchmark_regression_{args.mode}_{ts}' mmcv.mkdir_or_exist(osp.abspath(args.root_work_dir)) cfg = mmcv.load(args.config) - # priority for test and train tasks respectively - prio_test, prio_train = args.priority - prio = max(prio_test, prio_train) + 1 - - # number of benchmark regression tasks + # number of tasks num_task = 0 - for i in range(prio): - if i <= prio_test: - num_task += len(cfg['model_list'][f'P{i}']) - if i <= prio_train: - num_task += len(cfg['model_list'][f'P{i}']) + for i in range(args.priority + 1): + num_task += len(cfg['model_list'][f'P{i}']) # number of windows need to be created num_win = math.ceil(num_task / args.panes_per_window) @@ -123,30 +135,34 @@ def main(): os.system(f'tmux split-window -h -p {ratio}') os.system('tmux select-layout tiled') - # the initial number of task - cur_task = 1 - # get the hostname hostname = socket.gethostname() - print('hostname: ', hostname) + print('Hostname: ', hostname) # get the host ip ip = socket.gethostbyname(hostname) - print('ip: ', ip) + print('IP: ', ip) + # the initial number of task + cur_task = 1 # initialize a starting port cur_port = 29500 - for i in range(prio): + for i in range(args.priority + 1): models = cfg['model_list'][f'P{i}'] - # modes = ['test','train'] - modes = [] - if i <= prio_test: - modes.append('test') - if i <= prio_train: - modes.append('train') - for model in models: + # select the window and pane + cur_win = int(math.ceil(cur_task / args.panes_per_window)) + os.system('tmux select-window -t 0') + os.system(f'tmux select-window -t win_{cur_win}') + cur_pane = (cur_task - 1) % args.panes_per_window + os.system(f'tmux select-pane -t {cur_pane}') + + cmd = f'conda activate {args.env}' + os.system(f'tmux send-keys "{cmd}" "C-m"') + cmd = f'echo executing task: {cur_task}' + os.system(f'tmux send-keys "{cmd}" "C-m"') + cur_config = model['config'] cur_checkpoint = model['checkpoint'] @@ -155,88 +171,98 @@ def main(): else: task_name = osp.splitext(osp.basename(cur_config))[0] - for mode in modes: - # select the window and pane - cur_win = int(math.ceil(cur_task / args.panes_per_window)) - os.system('tmux select-window -t 0') - os.system(f'tmux select-window -t win_{cur_win}') - cur_pane = (cur_task - 1) % args.panes_per_window - os.system(f'tmux select-pane -t {cur_pane}') + cur_task_name = args.mode + '_' + task_name + cur_work_dir = osp.join(args.root_work_dir, cur_task_name) + + # if the port is used, use a random number for port + while not is_port_available(cur_port, ip): + cur_port = random.randint(1000, 50000) + + if args.mode == 'test': + cur_gpus = model['test']['gpus'] if 'test' in model.keys( + ) and 'gpus' in model['test'].keys( + ) else DEFAULT_TEST_ARGS['gpus'] + cur_gpus_per_node = model['test'][ + 'gpus_per_node'] if 'test' in model.keys( + ) and 'gpus_per_node' in model['test'].keys( + ) else DEFAULT_TEST_ARGS['gpus_per_node'] + cur_cpus_per_task = model['test'][ + 'cpus_per_task'] if 'test' in model.keys( + ) and 'cpus_per_task' in model['test'].keys( + ) else DEFAULT_TEST_ARGS['cpus_per_task'] + cur_partition = model['test'][ + 'partition'] if 'test' in model.keys( + ) and 'partition' in model['test'].keys( + ) else args.partition - cmd = f'conda activate {args.env}' - os.system(f'tmux send-keys "{cmd}" "C-m"') - cmd = f'echo executing task: {cur_task}' + # deal with extra python arguments + py_cmd = f' --work-dir {cur_work_dir} ' + + if 'test' in model.keys() and 'py_args' in model['test'].keys( + ): + keys = list(model['test']['py_args'].keys()) + values = list(model['test']['py_args'].values()) + + for k in range(len(keys)): + if values[k] is None: + if keys[k] in ['fuse_conv_bn', 'gpu_collect']: + py_cmd += f' --{keys[k]} ' + else: + py_cmd += f' --{keys[k]} {values[k]} ' + cmd = f'MASTER_PORT={cur_port} GPUS={cur_gpus} ' + \ + f'GPUS_PER_NODE={cur_gpus_per_node} ' + \ + f'CPUS_PER_TASK={cur_cpus_per_task} ' + \ + f'./tools/slurm_test.sh {cur_partition} ' + \ + f'{cur_task_name} ' + \ + f'{cur_config} {cur_checkpoint} ' + \ + f'{py_cmd}' os.system(f'tmux send-keys "{cmd}" "C-m"') - cur_partition = model[mode][ - 'partition'] if 'partition' in model[mode].keys( + else: + cur_gpus = model['train']['gpus'] if 'train' in model.keys( + ) and 'gpus' in model['train'].keys( + ) else DEFAULT_TRAIN_ARGS['gpus'] + cur_gpus_per_node = model['train'][ + 'gpus_per_node'] if 'train' in model.keys( + ) and 'gpus_per_node' in model['train'].keys( + ) else DEFAULT_TRAIN_ARGS['gpus_per_node'] + cur_cpus_per_task = model['train'][ + 'cpus_per_task'] if 'train' in model.keys( + ) and 'cpus_per_task' in model['train'].keys( + ) else DEFAULT_TRAIN_ARGS['cpus_per_task'] + cur_partition = model['train'][ + 'partition'] if 'train' in model.keys( + ) and 'partition' in model['train'].keys( ) else args.partition - cur_gpus = model[mode]['gpus'] if 'gpus' in model[mode].keys( - ) else args.gpus - cur_gpus_per_node = model[mode][ - 'gpus_per_node'] if 'gpus_per_node' in model[mode].keys( - ) else args.gpus_per_node - cur_cpus_per_task = model[mode][ - 'cpus_per_task'] if 'cpus_per_task' in model[mode].keys( - ) else args.cpus_per_task - - cur_task_name = mode + '_' + task_name - cur_work_dir = osp.join(args.root_work_dir, cur_task_name) - - if mode == 'test': - # deal with extra python arguments - py_cmd = f' --work-dir {cur_work_dir} ' - if 'py_args' in model[mode].keys(): - keys = list(model[mode]['py_args'].keys()) - values = list(model[mode]['py_args'].values()) - - for k in range(len(keys)): - if values[k] is None: - if keys[k] in ['fuse_conv_bn', 'gpu_collect']: - py_cmd += f' --{keys[k]} ' - else: - py_cmd += f' --{keys[k]} {values[k]} ' - cmd = f'MASTER_PORT={cur_port} GPUS={cur_gpus} ' + \ - f'GPUS_PER_NODE={cur_gpus_per_node} ' + \ - f'CPUS_PER_TASK={cur_cpus_per_task} ' + \ - f'./tools/slurm_test.sh {cur_partition} ' + \ - f'{cur_task_name} ' + \ - f'{cur_config} {cur_checkpoint} ' + \ - f'{py_cmd}' - - os.system(f'tmux send-keys "{cmd}" "C-m"') - - else: - py_cmd = ' ' - # deal with extra python arguments - if 'py_args' in model[mode].keys(): - keys = list(model[mode]['py_args'].keys()) - values = list(model[mode]['py_args'].values()) - - for k in range(len(keys)): - if values[k] is None: - if keys[k] in [ - 'no-validate', 'deterministic', - 'autoscale-lr' - ]: - py_cmd += f' --{keys[k]} ' - else: - py_cmd += f' --{keys[k]} {values[k]} ' - cmd = f'MASTER_PORT={cur_port} GPUS={cur_gpus} ' + \ - f'GPUS_PER_NODE={cur_gpus_per_node} ' + \ - f'CPUS_PER_TASK={cur_cpus_per_task} ' + \ - f'./tools/slurm_train.sh {cur_partition} ' + \ - f'{cur_task_name} ' + \ - f'{cur_config} {cur_work_dir} ' + \ - f'{py_cmd}' - os.system(f'tmux send-keys "{cmd}" "C-m"') - - cur_port += 1 - # if the port is used, use a random number for port - while not is_port_available(cur_port, ip): - cur_port = random.randint(29000, 39000) - print(f'port used in task {cur_task} is: {cur_port}') - cur_task += 1 + + # deal with extra python arguments + py_cmd = ' ' + if 'train' in model.keys( + ) and 'py_args' in model['train'].keys(): + keys = list(model['train']['py_args'].keys()) + values = list(model['train']['py_args'].values()) + + for k in range(len(keys)): + if values[k] is None: + if keys[k] in [ + 'no-validate', 'deterministic', + 'autoscale-lr' + ]: + py_cmd += f' --{keys[k]} ' + else: + py_cmd += f' --{keys[k]} {values[k]} ' + cmd = f'MASTER_PORT={cur_port} GPUS={cur_gpus} ' + \ + f'GPUS_PER_NODE={cur_gpus_per_node} ' + \ + f'CPUS_PER_TASK={cur_cpus_per_task} ' + \ + f'./tools/slurm_train.sh {cur_partition} ' + \ + f'{cur_task_name} ' + \ + f'{cur_config} {cur_work_dir} ' + \ + f'{py_cmd}' + os.system(f'tmux send-keys "{cmd}" "C-m"') + + print(f'port used in task {cur_task} is: {cur_port}') + cur_task += 1 + cur_port += 1 # close the base window os.system('tmux select-window -t 0') diff --git a/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_coco.md b/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_coco.md index f0d5babb0d..dc10b2bd7b 100644 --- a/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_coco.md +++ b/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_coco.md @@ -36,4 +36,4 @@ Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 da | Arch | Input Size | AP | AP50 | AP75 | AR | AR50 | ckpt | log | | :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: | -| [S-ViPNAS-Res50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/s_vipnas_res50_coco_256x192.py) | 256x192 | 0.711 | 0.893 | 0.789 | 0.769 | 0.769 | [ckpt](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_coco_256x192-cc43b466_20210624.pth) | [log](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_coco_256x192_20210624.log.json) | +| [S-ViPNAS-Res50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_res50_coco_256x192.py) | 256x192 | 0.711 | 0.893 | 0.789 | 0.769 | 0.769 | [ckpt](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_coco_256x192-cc43b466_20210624.pth) | [log](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_coco_256x192_20210624.log.json) | diff --git a/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_coco.yml b/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_coco.yml index 656cd5a6dd..f5c85c9231 100644 --- a/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_coco.yml +++ b/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_coco.yml @@ -8,11 +8,11 @@ Collections: - https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48 README: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_coco.md Models: -- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/s_vipnas_res50_coco_256x192.py +- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_res50_coco_256x192.py In Collection: vipnas_coco Metadata: Training Data: COCO - Name: body--2d_kpt_sview_rgb_img--topdown_heatmap--coco--s_vipnas_res50_coco_256x192 + Name: body--2d_kpt_sview_rgb_img--topdown_heatmap--coco--vipnas_res50_coco_256x192 Results: - Dataset: COCO Metrics: