Skip to content

Commit

Permalink
Modify the benchmark regression script (open-mmlab#885)
Browse files Browse the repository at this point in the history
* add default test and train args

* remove partition field from default args to the parser

* fix typo in config

* fix typo in model list
  • Loading branch information
liqikai9 authored Aug 31, 2021
1 parent 5f3c176 commit 0839f84
Show file tree
Hide file tree
Showing 4 changed files with 151 additions and 125 deletions.
4 changes: 2 additions & 2 deletions .dev_scripts/benchmark/benchmark_cfg.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ model_list:
checkpoint: https://download.openmmlab.com/mmpose/top_down/rsn/rsn18_coco_256x192-72f4b4a7_20201127.pth
# ViPNAS
## ViPNAS + COCO
- config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/s_vipnas_res50_coco_256x192.py
- config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_res50_coco_256x192.py
checkpoint: https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_coco_256x192-cc43b466_20210624.pth
# HRNetV2
## HRNetV2 + AFLW
Expand Down Expand Up @@ -70,7 +70,7 @@ model_list:
# CPM
## CPM + COCO
- config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/cpm_coco_256x192.py
checkpoint: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_coco_256x192-aa4ba095_20200817.
checkpoint: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_coco_256x192-aa4ba095_20200817.pth
## CPM + JHMDB
- config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/jhmdb/cpm_jhmdb_sub1_368x368.py
checkpoint: https://download.openmmlab.com/mmpose/top_down/cpm/cpm_jhmdb_sub1_368x368-2d2585c9_20201122.pth
Expand Down
266 changes: 146 additions & 120 deletions .dev_scripts/benchmark/benchmark_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,22 @@

import mmcv

DEFAULT_TEST_ARGS = dict(
gpus=1,
gpus_per_node=1,
cpus_per_task=5,
)

DEFAULT_TRAIN_ARGS = dict(
gpus=8,
gpus_per_node=8,
cpus_per_task=5,
)


def is_port_available(port, host='127.0.0.1'):
"""check whether a port is in use return True if the port is available else
False."""
"""check whether a port is in use, return True if the port is available
else False."""
s = None
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
Expand All @@ -29,41 +41,48 @@ def is_port_available(port, host='127.0.0.1'):
def parse_args():
parser = argparse.ArgumentParser(
description='running benchmark regression with tmux')
parser.add_argument(
'--partition',
'-p',
help='models with priority higher or equal to this will be included')

parser.add_argument(
'--config',
'-c',
help='test config file path',
default='./.dev_scripts/benchmark/benchmark_regression_cfg.yaml')
default='./.dev_scripts/benchmark/benchmark_cfg.yaml')
parser.add_argument(
'--mode',
help='the benchmark regression mode, can be "test" or "train"',
default='test')

parser.add_argument(
'--priority',
nargs=2,
type=int,
help='largest priority for test and train tasks respectively',
default=[3, 3])
help='models with priority higher or equal to this will be included',
default=2)

# runtime setting parameters
parser.add_argument(
'--root-work-dir', help='the root working directory to store logs')
'--root-work-dir',
'-r',
help='the root working directory to store logs')
parser.add_argument(
'--session-name', '-s', help='the tmux session name', default='test')
'--session-name',
'-s',
help='the tmux session name',
default='benchmark_regression')
parser.add_argument(
'--panes-per-window',
'-w',
type=int,
help='the maximum number of panes in each tmux window',
default=12)
parser.add_argument(
'--env',
'-e',
help='the conda environment used to run the tasks',
default='pt1.6')
parser.add_argument(
'--partition', help='the partition name', default='mm_human')
parser.add_argument('--gpus', help='the total number of GPUs', default=8)
parser.add_argument(
'--gpus-per-node',
default=8,
help='the number of GPUs used per computing node',
choices=[1, 2, 3, 4, 5, 6, 7, 8])
parser.add_argument(
'--cpus-per-task', default=5, help='the number of CPUs used per task')

args = parser.parse_args()
return args
Expand All @@ -76,22 +95,15 @@ def main():
# get the current time stamp
now = datetime.now()
ts = now.strftime('%Y_%m_%d_%H_%M')
args.root_work_dir = f'work_dirs/benchmark_regression_{ts}'
args.root_work_dir = f'work_dirs/benchmark_regression_{args.mode}_{ts}'
mmcv.mkdir_or_exist(osp.abspath(args.root_work_dir))

cfg = mmcv.load(args.config)

# priority for test and train tasks respectively
prio_test, prio_train = args.priority
prio = max(prio_test, prio_train) + 1

# number of benchmark regression tasks
# number of tasks
num_task = 0
for i in range(prio):
if i <= prio_test:
num_task += len(cfg['model_list'][f'P{i}'])
if i <= prio_train:
num_task += len(cfg['model_list'][f'P{i}'])
for i in range(args.priority + 1):
num_task += len(cfg['model_list'][f'P{i}'])

# number of windows need to be created
num_win = math.ceil(num_task / args.panes_per_window)
Expand Down Expand Up @@ -123,30 +135,34 @@ def main():
os.system(f'tmux split-window -h -p {ratio}')
os.system('tmux select-layout tiled')

# the initial number of task
cur_task = 1

# get the hostname
hostname = socket.gethostname()
print('hostname: ', hostname)
print('Hostname: ', hostname)
# get the host ip
ip = socket.gethostbyname(hostname)
print('ip: ', ip)
print('IP: ', ip)

# the initial number of task
cur_task = 1
# initialize a starting port
cur_port = 29500

for i in range(prio):
for i in range(args.priority + 1):
models = cfg['model_list'][f'P{i}']

# modes = ['test','train']
modes = []
if i <= prio_test:
modes.append('test')
if i <= prio_train:
modes.append('train')

for model in models:
# select the window and pane
cur_win = int(math.ceil(cur_task / args.panes_per_window))
os.system('tmux select-window -t 0')
os.system(f'tmux select-window -t win_{cur_win}')
cur_pane = (cur_task - 1) % args.panes_per_window
os.system(f'tmux select-pane -t {cur_pane}')

cmd = f'conda activate {args.env}'
os.system(f'tmux send-keys "{cmd}" "C-m"')
cmd = f'echo executing task: {cur_task}'
os.system(f'tmux send-keys "{cmd}" "C-m"')

cur_config = model['config']
cur_checkpoint = model['checkpoint']

Expand All @@ -155,88 +171,98 @@ def main():
else:
task_name = osp.splitext(osp.basename(cur_config))[0]

for mode in modes:
# select the window and pane
cur_win = int(math.ceil(cur_task / args.panes_per_window))
os.system('tmux select-window -t 0')
os.system(f'tmux select-window -t win_{cur_win}')
cur_pane = (cur_task - 1) % args.panes_per_window
os.system(f'tmux select-pane -t {cur_pane}')
cur_task_name = args.mode + '_' + task_name
cur_work_dir = osp.join(args.root_work_dir, cur_task_name)

# if the port is used, use a random number for port
while not is_port_available(cur_port, ip):
cur_port = random.randint(1000, 50000)

if args.mode == 'test':
cur_gpus = model['test']['gpus'] if 'test' in model.keys(
) and 'gpus' in model['test'].keys(
) else DEFAULT_TEST_ARGS['gpus']
cur_gpus_per_node = model['test'][
'gpus_per_node'] if 'test' in model.keys(
) and 'gpus_per_node' in model['test'].keys(
) else DEFAULT_TEST_ARGS['gpus_per_node']
cur_cpus_per_task = model['test'][
'cpus_per_task'] if 'test' in model.keys(
) and 'cpus_per_task' in model['test'].keys(
) else DEFAULT_TEST_ARGS['cpus_per_task']
cur_partition = model['test'][
'partition'] if 'test' in model.keys(
) and 'partition' in model['test'].keys(
) else args.partition

cmd = f'conda activate {args.env}'
os.system(f'tmux send-keys "{cmd}" "C-m"')
cmd = f'echo executing task: {cur_task}'
# deal with extra python arguments
py_cmd = f' --work-dir {cur_work_dir} '

if 'test' in model.keys() and 'py_args' in model['test'].keys(
):
keys = list(model['test']['py_args'].keys())
values = list(model['test']['py_args'].values())

for k in range(len(keys)):
if values[k] is None:
if keys[k] in ['fuse_conv_bn', 'gpu_collect']:
py_cmd += f' --{keys[k]} '
else:
py_cmd += f' --{keys[k]} {values[k]} '
cmd = f'MASTER_PORT={cur_port} GPUS={cur_gpus} ' + \
f'GPUS_PER_NODE={cur_gpus_per_node} ' + \
f'CPUS_PER_TASK={cur_cpus_per_task} ' + \
f'./tools/slurm_test.sh {cur_partition} ' + \
f'{cur_task_name} ' + \
f'{cur_config} {cur_checkpoint} ' + \
f'{py_cmd}'
os.system(f'tmux send-keys "{cmd}" "C-m"')

cur_partition = model[mode][
'partition'] if 'partition' in model[mode].keys(
else:
cur_gpus = model['train']['gpus'] if 'train' in model.keys(
) and 'gpus' in model['train'].keys(
) else DEFAULT_TRAIN_ARGS['gpus']
cur_gpus_per_node = model['train'][
'gpus_per_node'] if 'train' in model.keys(
) and 'gpus_per_node' in model['train'].keys(
) else DEFAULT_TRAIN_ARGS['gpus_per_node']
cur_cpus_per_task = model['train'][
'cpus_per_task'] if 'train' in model.keys(
) and 'cpus_per_task' in model['train'].keys(
) else DEFAULT_TRAIN_ARGS['cpus_per_task']
cur_partition = model['train'][
'partition'] if 'train' in model.keys(
) and 'partition' in model['train'].keys(
) else args.partition
cur_gpus = model[mode]['gpus'] if 'gpus' in model[mode].keys(
) else args.gpus
cur_gpus_per_node = model[mode][
'gpus_per_node'] if 'gpus_per_node' in model[mode].keys(
) else args.gpus_per_node
cur_cpus_per_task = model[mode][
'cpus_per_task'] if 'cpus_per_task' in model[mode].keys(
) else args.cpus_per_task

cur_task_name = mode + '_' + task_name
cur_work_dir = osp.join(args.root_work_dir, cur_task_name)

if mode == 'test':
# deal with extra python arguments
py_cmd = f' --work-dir {cur_work_dir} '
if 'py_args' in model[mode].keys():
keys = list(model[mode]['py_args'].keys())
values = list(model[mode]['py_args'].values())

for k in range(len(keys)):
if values[k] is None:
if keys[k] in ['fuse_conv_bn', 'gpu_collect']:
py_cmd += f' --{keys[k]} '
else:
py_cmd += f' --{keys[k]} {values[k]} '
cmd = f'MASTER_PORT={cur_port} GPUS={cur_gpus} ' + \
f'GPUS_PER_NODE={cur_gpus_per_node} ' + \
f'CPUS_PER_TASK={cur_cpus_per_task} ' + \
f'./tools/slurm_test.sh {cur_partition} ' + \
f'{cur_task_name} ' + \
f'{cur_config} {cur_checkpoint} ' + \
f'{py_cmd}'

os.system(f'tmux send-keys "{cmd}" "C-m"')

else:
py_cmd = ' '
# deal with extra python arguments
if 'py_args' in model[mode].keys():
keys = list(model[mode]['py_args'].keys())
values = list(model[mode]['py_args'].values())

for k in range(len(keys)):
if values[k] is None:
if keys[k] in [
'no-validate', 'deterministic',
'autoscale-lr'
]:
py_cmd += f' --{keys[k]} '
else:
py_cmd += f' --{keys[k]} {values[k]} '
cmd = f'MASTER_PORT={cur_port} GPUS={cur_gpus} ' + \
f'GPUS_PER_NODE={cur_gpus_per_node} ' + \
f'CPUS_PER_TASK={cur_cpus_per_task} ' + \
f'./tools/slurm_train.sh {cur_partition} ' + \
f'{cur_task_name} ' + \
f'{cur_config} {cur_work_dir} ' + \
f'{py_cmd}'
os.system(f'tmux send-keys "{cmd}" "C-m"')

cur_port += 1
# if the port is used, use a random number for port
while not is_port_available(cur_port, ip):
cur_port = random.randint(29000, 39000)
print(f'port used in task {cur_task} is: {cur_port}')
cur_task += 1

# deal with extra python arguments
py_cmd = ' '
if 'train' in model.keys(
) and 'py_args' in model['train'].keys():
keys = list(model['train']['py_args'].keys())
values = list(model['train']['py_args'].values())

for k in range(len(keys)):
if values[k] is None:
if keys[k] in [
'no-validate', 'deterministic',
'autoscale-lr'
]:
py_cmd += f' --{keys[k]} '
else:
py_cmd += f' --{keys[k]} {values[k]} '
cmd = f'MASTER_PORT={cur_port} GPUS={cur_gpus} ' + \
f'GPUS_PER_NODE={cur_gpus_per_node} ' + \
f'CPUS_PER_TASK={cur_cpus_per_task} ' + \
f'./tools/slurm_train.sh {cur_partition} ' + \
f'{cur_task_name} ' + \
f'{cur_config} {cur_work_dir} ' + \
f'{py_cmd}'
os.system(f'tmux send-keys "{cmd}" "C-m"')

print(f'port used in task {cur_task} is: {cur_port}')
cur_task += 1
cur_port += 1

# close the base window
os.system('tmux select-window -t 0')
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,4 +36,4 @@ Results on COCO val2017 with detector having human AP of 56.4 on COCO val2017 da

| Arch | Input Size | AP | AP<sup>50</sup> | AP<sup>75</sup> | AR | AR<sup>50</sup> | ckpt | log |
| :-------------- | :-----------: | :------: | :------: | :------: | :------: | :------: |:------: |:------: |
| [S-ViPNAS-Res50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/s_vipnas_res50_coco_256x192.py) | 256x192 | 0.711 | 0.893 | 0.789 | 0.769 | 0.769 | [ckpt](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_coco_256x192-cc43b466_20210624.pth) | [log](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_coco_256x192_20210624.log.json) |
| [S-ViPNAS-Res50](/configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_res50_coco_256x192.py) | 256x192 | 0.711 | 0.893 | 0.789 | 0.769 | 0.769 | [ckpt](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_coco_256x192-cc43b466_20210624.pth) | [log](https://download.openmmlab.com/mmpose/top_down/vipnas/vipnas_res50_coco_256x192_20210624.log.json) |
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,11 @@ Collections:
- https://link.springer.com/chapter/10.1007/978-3-319-10602-1_48
README: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_coco.md
Models:
- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/s_vipnas_res50_coco_256x192.py
- Config: configs/body/2d_kpt_sview_rgb_img/topdown_heatmap/coco/vipnas_res50_coco_256x192.py
In Collection: vipnas_coco
Metadata:
Training Data: COCO
Name: body--2d_kpt_sview_rgb_img--topdown_heatmap--coco--s_vipnas_res50_coco_256x192
Name: body--2d_kpt_sview_rgb_img--topdown_heatmap--coco--vipnas_res50_coco_256x192
Results:
- Dataset: COCO
Metrics:
Expand Down

0 comments on commit 0839f84

Please sign in to comment.