From e5bd2fccd7d50e2870aa6eeaf106f28be18e36de Mon Sep 17 00:00:00 2001 From: ouyangyu Date: Wed, 11 Aug 2021 10:35:53 +0800 Subject: [PATCH 1/3] add extract gpt result script --- .../GPT/extract_gpt_result.py | 75 +++++++++++++++++++ OneFlow/LanguageModeling/GPT/extract_util.py | 69 +++++++++++++++++ 2 files changed, 144 insertions(+) create mode 100644 OneFlow/LanguageModeling/GPT/extract_gpt_result.py create mode 100644 OneFlow/LanguageModeling/GPT/extract_util.py diff --git a/OneFlow/LanguageModeling/GPT/extract_gpt_result.py b/OneFlow/LanguageModeling/GPT/extract_gpt_result.py new file mode 100644 index 00000000..127c26a3 --- /dev/null +++ b/OneFlow/LanguageModeling/GPT/extract_gpt_result.py @@ -0,0 +1,75 @@ +import os +import argparse +from extract_util import extract_result + + +parser = argparse.ArgumentParser(description="flags for BERT benchmark") +parser.add_argument( + "--benchmark_log_dir", type=str, default="./logs/oneflow", + required=False) +parser.add_argument("--start_iter", type=int, default=300) +parser.add_argument("--end_iter", type=int, default=400) +parser.add_argument("--print_mode", type=str, default='markdown') +args = parser.parse_args() + + +def extract_info_from_file(log_file): + ''' + num_nodes ....................................... 1 + num_gpus_per_node ............................... 8 + data_parallel_size .............................. 1 + tensor_model_parallel_size ...................... 8 + pipeline_model_parallel_size .................... 1 + global_batch_size ............................... 32 + micro_batch_size ................................ 32 + num_accumulation_steps .......................... 1 + num_layers ...................................... 16 + hidden_size ..................................... 2304 + num_attention_heads ............................. 16 + seq_length ...................................... 2048 + Training... + | step | micro_batches | samples | throughput | latency | loss | + | -------- | --------------- | --------------- | ---------- | ---------- | ---------- | + | 1 | 1 | 32 | 3.65895 | 8.74569 | 11.27187 | + | 2 | 2 | 64 | 5.92391 | 5.40183 | 22.54614 | + | 3 | 3 | 96 | 33.08657 | 0.96716 | 33.82825 | + | 4 | 4 | 128 | 32.91274 | 0.97227 | 45.10602 | + | 5 | 5 | 160 | 33.05942 | 0.96795 | 56.36795 | + | 6 | 6 | 192 | 32.97452 | 0.97045 | 67.64371 | + | 7 | 7 | 224 | 32.75634 | 0.97691 | 78.92993 | + | 8 | 8 | 256 | 33.13264 | 0.96581 | 90.20315 | + | 9 | 9 | 288 | 33.01570 | 0.96924 | 101.47802 | + utilization.gpu [%], memory.used [MiB] + 100 %, 13858 MiB + 100 %, 13994 MiB + 100 %, 13994 MiB + 100 %, 13994 MiB + 100 %, 13994 MiB + 93 %, 13994 MiB + 100 %, 14102 MiB + 100 %, 13850 MiB + ''' + # extract info from file name + # print('extract file:',log_file) + result_dict = {} + with open(log_file, 'r') as f: + for line in f.readlines(): + ss = line.split(' ') + if len(ss) == 5 and ss[2] in ['num_nodes', 'num_gpus_per_node', 'data_parallel_size','tensor_model_parallel_size','pipeline_model_parallel_size','micro_batch_size','global_batch_size','num_accumulation_steps','num_layers','hidden_size','num_attention_heads','seq_length']: + result_dict[ss[2]] = ss[-1].strip() + elif len(ss) == 4 and 'MiB' in line and 'utilization' not in line: + memory_userd = int(ss[-2]) + if 'memory' not in result_dict.keys() or result_dict['memory'] < memory_userd: + result_dict['memory'] = memory_userd + + ss = line.split('|') + if len(ss) == 8 and "loss" not in line and "-" not in line: + tmp_line = ''.join(line.split(' ')).split('|') + result_dict['throughput_{}'.format(tmp_line[1])] = float(tmp_line[4]) + result_dict['latency_{}'.format(tmp_line[1])] = float(tmp_line[5])*1000 + + return result_dict + + +if __name__ == "__main__": + extract_result(args, extract_info_from_file) diff --git a/OneFlow/LanguageModeling/GPT/extract_util.py b/OneFlow/LanguageModeling/GPT/extract_util.py new file mode 100644 index 00000000..29c48a56 --- /dev/null +++ b/OneFlow/LanguageModeling/GPT/extract_util.py @@ -0,0 +1,69 @@ +import os +import glob +from statistics import median + + +def compute_throughput(result_dict, args): + throughput = 0 + latency = 0 + for i in range(args.start_iter,args.end_iter): + throughput += result_dict['throughput_{}'.format(i)] + latency += result_dict['latency_{}'.format(i)] + + + return latency / (args.end_iter - args.start_iter), throughput / (args.end_iter - args.start_iter) + + +def get_mode_print(mode): + def mode_print(lst): + if mode == 'markdown': + print('|', ' | '.join(('{:.2f}' if type(v) is float else '{}').format(v) for v in lst), '|') + else: + print(','.join(('{:.2f}' if type(v) is float else '{}').format(v) for v in lst)) + return mode_print + + +def extract_result(args, extract_func): + mode_print = get_mode_print(args.print_mode) + logs_list = glob.glob(os.path.join(args.benchmark_log_dir, "*/*.log")) + logs_list = sorted(logs_list) + + throughput_final_result_dict = {} + memory_final_result_dict = {} + lantency_final_result_dict = {} + print("## All Results") + header_list = ['case', 'memory','lantency','throuthput(sample/sec)'] + mode_print(header_list) + if args.print_mode == 'markdown': + mode_print(['--------' for _ in range(4)]) + for l in logs_list: + result_dict = extract_func(l) + lantency, throughput = compute_throughput(result_dict, args) + case = "{num_nodes}n{num_gpus_per_node}g_dp{data_parallel_size}_mp{tensor_model_parallel_size}_pp{pipeline_model_parallel_size}_mbs{micro_batch_size}_gbs{global_batch_size}_na{num_accumulation_steps}_l{num_layers}_hs{hidden_size}_nah{num_attention_heads}_sl{seq_length}".format(**result_dict) + mode_print([case, "{} (MiB)".format(result_dict['memory']), "{} (ms)".format(round(lantency,2)), throughput]) + + if case in throughput_final_result_dict: + throughput_final_result_dict[case].append(throughput) + memory_final_result_dict[case].append(result_dict['memory']) + lantency_final_result_dict[case].append(lantency) + else: + throughput_final_result_dict[case] = [throughput] + memory_final_result_dict[case] = [result_dict['memory']] + lantency_final_result_dict[case] = [lantency] + + # calculate median throughput and speedup + final_result_list = [] + for k, v in throughput_final_result_dict.items(): + final_result_list.append([k,max(memory_final_result_dict[k]),median(lantency_final_result_dict[k]),median(v)]) + + # sort final_result_list + #final_result_list = sorted(final_result_list, key=lambda x: (-x[2], x[0], x[1])) + + # print results + print("## Filtered Result `median value`") + mode_print(['case', 'memory (MiB)','lantency (ms)','throuthput(sample/sec)']) + if args.print_mode == 'markdown': + mode_print(['--------' for _ in range(5)]) + for res in final_result_list: + mode_print(res) + From 56e14e850c858fe605c2c7be1c1645da28e38a8c Mon Sep 17 00:00:00 2001 From: ouyangyu Date: Wed, 11 Aug 2021 11:48:59 +0800 Subject: [PATCH 2/3] code format --- .../GPT/extract_gpt_result.py | 52 +++++++++----- OneFlow/LanguageModeling/GPT/extract_util.py | 71 +++++++++++++------ 2 files changed, 86 insertions(+), 37 deletions(-) diff --git a/OneFlow/LanguageModeling/GPT/extract_gpt_result.py b/OneFlow/LanguageModeling/GPT/extract_gpt_result.py index 127c26a3..00d6e3b7 100644 --- a/OneFlow/LanguageModeling/GPT/extract_gpt_result.py +++ b/OneFlow/LanguageModeling/GPT/extract_gpt_result.py @@ -1,20 +1,20 @@ import os import argparse -from extract_util import extract_result +from extract_util import extract_result parser = argparse.ArgumentParser(description="flags for BERT benchmark") parser.add_argument( - "--benchmark_log_dir", type=str, default="./logs/oneflow", - required=False) + "--benchmark_log_dir", type=str, default="./logs/oneflow", required=False +) parser.add_argument("--start_iter", type=int, default=300) parser.add_argument("--end_iter", type=int, default=400) -parser.add_argument("--print_mode", type=str, default='markdown') +parser.add_argument("--print_mode", type=str, default="markdown") args = parser.parse_args() def extract_info_from_file(log_file): - ''' + """ num_nodes ....................................... 1 num_gpus_per_node ............................... 8 data_parallel_size .............................. 1 @@ -27,6 +27,7 @@ def extract_info_from_file(log_file): hidden_size ..................................... 2304 num_attention_heads ............................. 16 seq_length ...................................... 2048 + log_interval .................................... 1 Training... | step | micro_batches | samples | throughput | latency | loss | | -------- | --------------- | --------------- | ---------- | ---------- | ---------- | @@ -48,25 +49,44 @@ def extract_info_from_file(log_file): 93 %, 13994 MiB 100 %, 14102 MiB 100 %, 13850 MiB - ''' + """ # extract info from file name # print('extract file:',log_file) result_dict = {} - with open(log_file, 'r') as f: + with open(log_file, "r") as f: for line in f.readlines(): - ss = line.split(' ') - if len(ss) == 5 and ss[2] in ['num_nodes', 'num_gpus_per_node', 'data_parallel_size','tensor_model_parallel_size','pipeline_model_parallel_size','micro_batch_size','global_batch_size','num_accumulation_steps','num_layers','hidden_size','num_attention_heads','seq_length']: + ss = line.split(" ") + if len(ss) == 5 and ss[2] in [ + "num_nodes", + "num_gpus_per_node", + "data_parallel_size", + "tensor_model_parallel_size", + "pipeline_model_parallel_size", + "micro_batch_size", + "global_batch_size", + "num_accumulation_steps", + "num_layers", + "hidden_size", + "num_attention_heads", + "seq_length", + "log_interval", + ]: result_dict[ss[2]] = ss[-1].strip() - elif len(ss) == 4 and 'MiB' in line and 'utilization' not in line: + elif len(ss) == 4 and "MiB" in line and "utilization" not in line: memory_userd = int(ss[-2]) - if 'memory' not in result_dict.keys() or result_dict['memory'] < memory_userd: - result_dict['memory'] = memory_userd + if ( + "memory" not in result_dict.keys() + or result_dict["memory"] < memory_userd + ): + result_dict["memory"] = memory_userd - ss = line.split('|') + ss = line.split("|") if len(ss) == 8 and "loss" not in line and "-" not in line: - tmp_line = ''.join(line.split(' ')).split('|') - result_dict['throughput_{}'.format(tmp_line[1])] = float(tmp_line[4]) - result_dict['latency_{}'.format(tmp_line[1])] = float(tmp_line[5])*1000 + tmp_line = "".join(line.split(" ")).split("|") + result_dict["throughput_{}".format(tmp_line[1])] = float(tmp_line[4]) + result_dict["latency_{}".format(tmp_line[1])] = ( + float(tmp_line[5]) * 1000 + ) return result_dict diff --git a/OneFlow/LanguageModeling/GPT/extract_util.py b/OneFlow/LanguageModeling/GPT/extract_util.py index 29c48a56..fa55834c 100644 --- a/OneFlow/LanguageModeling/GPT/extract_util.py +++ b/OneFlow/LanguageModeling/GPT/extract_util.py @@ -6,20 +6,34 @@ def compute_throughput(result_dict, args): throughput = 0 latency = 0 - for i in range(args.start_iter,args.end_iter): - throughput += result_dict['throughput_{}'.format(i)] - latency += result_dict['latency_{}'.format(i)] + log_interval = int(result_dict["log_interval"]) + for i in range(args.start_iter, args.end_iter + log_interval, log_interval): + throughput += result_dict["throughput_{}".format(i)] + latency += result_dict["latency_{}".format(i)] - - return latency / (args.end_iter - args.start_iter), throughput / (args.end_iter - args.start_iter) + return ( + latency / (args.end_iter - args.start_iter), + throughput / (args.end_iter - args.start_iter), + ) def get_mode_print(mode): def mode_print(lst): - if mode == 'markdown': - print('|', ' | '.join(('{:.2f}' if type(v) is float else '{}').format(v) for v in lst), '|') + if mode == "markdown": + print( + "|", + " | ".join( + ("{:.2f}" if type(v) is float else "{}").format(v) for v in lst + ), + "|", + ) else: - print(','.join(('{:.2f}' if type(v) is float else '{}').format(v) for v in lst)) + print( + ",".join( + ("{:.2f}" if type(v) is float else "{}").format(v) for v in lst + ) + ) + return mode_print @@ -32,38 +46,53 @@ def extract_result(args, extract_func): memory_final_result_dict = {} lantency_final_result_dict = {} print("## All Results") - header_list = ['case', 'memory','lantency','throuthput(sample/sec)'] + header_list = ["case", "memory", "lantency", "throuthput(sample/sec)"] mode_print(header_list) - if args.print_mode == 'markdown': - mode_print(['--------' for _ in range(4)]) + if args.print_mode == "markdown": + mode_print(["--------" for _ in range(4)]) for l in logs_list: result_dict = extract_func(l) lantency, throughput = compute_throughput(result_dict, args) - case = "{num_nodes}n{num_gpus_per_node}g_dp{data_parallel_size}_mp{tensor_model_parallel_size}_pp{pipeline_model_parallel_size}_mbs{micro_batch_size}_gbs{global_batch_size}_na{num_accumulation_steps}_l{num_layers}_hs{hidden_size}_nah{num_attention_heads}_sl{seq_length}".format(**result_dict) - mode_print([case, "{} (MiB)".format(result_dict['memory']), "{} (ms)".format(round(lantency,2)), throughput]) + case = "{num_nodes}n{num_gpus_per_node}g_dp{data_parallel_size}_mp{tensor_model_parallel_size}_pp{pipeline_model_parallel_size}_mbs{micro_batch_size}_gbs{global_batch_size}_na{num_accumulation_steps}_l{num_layers}_hs{hidden_size}_nah{num_attention_heads}_sl{seq_length}".format( + **result_dict + ) + mode_print( + [ + case, + "{} (MiB)".format(result_dict["memory"]), + "{} (ms)".format(round(lantency, 2)), + throughput, + ] + ) if case in throughput_final_result_dict: throughput_final_result_dict[case].append(throughput) - memory_final_result_dict[case].append(result_dict['memory']) + memory_final_result_dict[case].append(result_dict["memory"]) lantency_final_result_dict[case].append(lantency) else: throughput_final_result_dict[case] = [throughput] - memory_final_result_dict[case] = [result_dict['memory']] + memory_final_result_dict[case] = [result_dict["memory"]] lantency_final_result_dict[case] = [lantency] # calculate median throughput and speedup final_result_list = [] for k, v in throughput_final_result_dict.items(): - final_result_list.append([k,max(memory_final_result_dict[k]),median(lantency_final_result_dict[k]),median(v)]) + final_result_list.append( + [ + k, + max(memory_final_result_dict[k]), + median(lantency_final_result_dict[k]), + median(v), + ] + ) # sort final_result_list - #final_result_list = sorted(final_result_list, key=lambda x: (-x[2], x[0], x[1])) + # final_result_list = sorted(final_result_list, key=lambda x: (-x[2], x[0], x[1])) # print results print("## Filtered Result `median value`") - mode_print(['case', 'memory (MiB)','lantency (ms)','throuthput(sample/sec)']) - if args.print_mode == 'markdown': - mode_print(['--------' for _ in range(5)]) + mode_print(["case", "memory (MiB)", "lantency (ms)", "throuthput(sample/sec)"]) + if args.print_mode == "markdown": + mode_print(["--------" for _ in range(5)]) for res in final_result_list: mode_print(res) - From 1d13526c9a437824831df4e3c5f1e4dfc112933e Mon Sep 17 00:00:00 2001 From: ouyangyu Date: Fri, 10 Sep 2021 14:39:14 +0800 Subject: [PATCH 3/3] refine --- OneFlow/LanguageModeling/GPT/extract_gpt_result.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/OneFlow/LanguageModeling/GPT/extract_gpt_result.py b/OneFlow/LanguageModeling/GPT/extract_gpt_result.py index 00d6e3b7..348bf499 100644 --- a/OneFlow/LanguageModeling/GPT/extract_gpt_result.py +++ b/OneFlow/LanguageModeling/GPT/extract_gpt_result.py @@ -3,7 +3,7 @@ from extract_util import extract_result -parser = argparse.ArgumentParser(description="flags for BERT benchmark") +parser = argparse.ArgumentParser(description="flags for GPT benchmark") parser.add_argument( "--benchmark_log_dir", type=str, default="./logs/oneflow", required=False )