From e3f5c00b2b4ddc6f5b25b08d7aaa8bc0df8e1e86 Mon Sep 17 00:00:00 2001 From: gmm <38800877+mmglove@users.noreply.github.com> Date: Thu, 23 Sep 2021 15:25:33 +0800 Subject: [PATCH] change jp contents (#1090) * Revert "delete --privileged,test=document_fix (#1052)" This reverts commit f177e92ee13e8b39df1dd2553aeee6cb4e999ef2. * add OtherFrame demo * add OtherFrame demo * add OtherFrame demo * add OtherFrame demo,test=document_fix * add OtherFrame demo,test=document_fix * add OtherFrame demo,test=document_fix * add OtherFrame demo,test=document_fix * add OtherFrame demo,test=document_fix * add jp contents,test=document_fix * add jp contents,test=document_fix * change jp contents,test=document_fix * change jp contents,test=document_fix * change jp contents,test=document_fix * change jp contents,test=document_fix * change jp contents,test=document_fix --- OtherFrame/Speech/PyTorch/README_demo.md | 90 +++++++++++++ .../Speech/PyTorch/models/README_demo.md | 3 + .../Speech/PyTorch/scripts/README_demo.md | 39 ++++++ OtherFrame/clas/PyTorch/README_demo.md | 90 +++++++++++++ OtherFrame/clas/PyTorch/models/README_demo.md | 3 + .../clas/PyTorch/scripts/README_demo.md | 39 ++++++ OtherFrame/clas/mxnet/README_demo.md | 90 +++++++++++++ OtherFrame/clas/mxnet/models/README_demo.md | 3 + OtherFrame/clas/mxnet/scripts/README_demo.md | 39 ++++++ OtherFrame/detection/PyTorch/README_demo.md | 90 +++++++++++++ .../detection/PyTorch/models/README_demo.md | 3 + .../detection/PyTorch/scripts/README_demo.md | 39 ++++++ OtherFrame/gan/PyTorch/README_demo.md | 90 +++++++++++++ OtherFrame/gan/PyTorch/models/README_demo.md | 3 + OtherFrame/gan/PyTorch/scripts/README_demo.md | 39 ++++++ OtherFrame/nlp/PyTorch/PrepareEnv_demo.sh | 42 ++++++ OtherFrame/nlp/PyTorch/README_demo.md | 90 +++++++++++++ .../models/NLP_demo/nlp_modelName/train.py | 3 + OtherFrame/nlp/PyTorch/run_PyTorch_demo.sh | 25 ++++ .../scripts/NLP_demo/nlp_modelName/README.md | 121 ++++++++++++++++++ .../NLP_demo/nlp_modelName/analysis_log.py | 4 + .../scripts/NLP_demo/nlp_modelName/preData.sh | 4 + .../NLP_demo/nlp_modelName/run_benchmark.sh | 60 +++++++++ OtherFrame/ocr/PyTorch/README_demo.md | 90 +++++++++++++ OtherFrame/ocr/PyTorch/models/README_demo.md | 3 + OtherFrame/ocr/PyTorch/scripts/README_demo.md | 39 ++++++ OtherFrame/seg/PyTorch/README_demo.md | 90 +++++++++++++ OtherFrame/seg/PyTorch/models/README_demo.md | 3 + OtherFrame/seg/PyTorch/scripts/README_demo.md | 39 ++++++ OtherFrame/video/PyTorch/README_demo.md | 90 +++++++++++++ .../video/PyTorch/models/README_demo.md | 3 + .../video/PyTorch/scripts/README_demo.md | 39 ++++++ 32 files changed, 1405 insertions(+) create mode 100644 OtherFrame/Speech/PyTorch/README_demo.md create mode 100644 OtherFrame/Speech/PyTorch/models/README_demo.md create mode 100644 OtherFrame/Speech/PyTorch/scripts/README_demo.md create mode 100644 OtherFrame/clas/PyTorch/README_demo.md create mode 100644 OtherFrame/clas/PyTorch/models/README_demo.md create mode 100644 OtherFrame/clas/PyTorch/scripts/README_demo.md create mode 100644 OtherFrame/clas/mxnet/README_demo.md create mode 100644 OtherFrame/clas/mxnet/models/README_demo.md create mode 100644 OtherFrame/clas/mxnet/scripts/README_demo.md create mode 100644 OtherFrame/detection/PyTorch/README_demo.md create mode 100644 OtherFrame/detection/PyTorch/models/README_demo.md create mode 100644 OtherFrame/detection/PyTorch/scripts/README_demo.md create mode 100644 OtherFrame/gan/PyTorch/README_demo.md create mode 100644 OtherFrame/gan/PyTorch/models/README_demo.md create mode 100644 OtherFrame/gan/PyTorch/scripts/README_demo.md create mode 100755 OtherFrame/nlp/PyTorch/PrepareEnv_demo.sh create mode 100644 OtherFrame/nlp/PyTorch/README_demo.md create mode 100644 OtherFrame/nlp/PyTorch/models/NLP_demo/nlp_modelName/train.py create mode 100755 OtherFrame/nlp/PyTorch/run_PyTorch_demo.sh create mode 100644 OtherFrame/nlp/PyTorch/scripts/NLP_demo/nlp_modelName/README.md create mode 100644 OtherFrame/nlp/PyTorch/scripts/NLP_demo/nlp_modelName/analysis_log.py create mode 100755 OtherFrame/nlp/PyTorch/scripts/NLP_demo/nlp_modelName/preData.sh create mode 100755 OtherFrame/nlp/PyTorch/scripts/NLP_demo/nlp_modelName/run_benchmark.sh create mode 100644 OtherFrame/ocr/PyTorch/README_demo.md create mode 100644 OtherFrame/ocr/PyTorch/models/README_demo.md create mode 100644 OtherFrame/ocr/PyTorch/scripts/README_demo.md create mode 100644 OtherFrame/seg/PyTorch/README_demo.md create mode 100644 OtherFrame/seg/PyTorch/models/README_demo.md create mode 100644 OtherFrame/seg/PyTorch/scripts/README_demo.md create mode 100644 OtherFrame/video/PyTorch/README_demo.md create mode 100644 OtherFrame/video/PyTorch/models/README_demo.md create mode 100644 OtherFrame/video/PyTorch/scripts/README_demo.md diff --git a/OtherFrame/Speech/PyTorch/README_demo.md b/OtherFrame/Speech/PyTorch/README_demo.md new file mode 100644 index 0000000000..467a78553d --- /dev/null +++ b/OtherFrame/Speech/PyTorch/README_demo.md @@ -0,0 +1,90 @@ +# NGC PyTorch 性能复现 +## 本readme仅为示例,相关内容请勿更新到此, NLP_demo也仅为示例 +## 目录 + +├── PrepareEnv.sh # 竞品PyTorch运行环境搭建 +├── README.md # 运行文档 +├── models # 提供竞品PyTorch框架的修改后的模型,官方模型请直接在脚本中拉取,统一方向的模型commit应一致,如不一致请单独在模型运行脚本中写明运行的commit +├── run_PyTorch.sh # 全量竞品PyTorch框架模型运行脚本 +└── scripts # 提供各个模型复现性能的脚本 +## 环境介绍 +### 1.物理机环境 +- 单机(单卡、8卡) + - 系统:CentOS release 7.5 (Final) + - GPU:Tesla V100-SXM2-32GB * 8 + - CPU:Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz * 80 + - Driver Version: 460.27.04 + - 内存:629 GB + - CUDA、cudnn Version: cuda10.1-cudnn7 、 cuda11.2-cudnn8-gcc82 +- 多机(32卡) TODO +### 2.Docker 镜像,如: + +NGC PyTorch 的代码仓库提供了自动构建 Docker 镜像的 [Dockerfile](https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/Translation/Transformer/Dockerfile), + +- **镜像版本**: `nvcr.io/nvidia/pytorch:20.06-py3` # 竞品镜像,每个方向的请一致 +- **PyTorch 版本**: `1.6.0a0+9907a3e` # 竞品版本:最新稳定版本,如需特定版本请备注说明原因 +- **CUDA 版本**: `11.2` +- **cuDnn 版本**: `8.0.1` + +## 测试步骤 +```bash +bash run_PyTorch.sh; # 创建容器,在该标准环境中测试模型 +``` +脚本内容,如: +```bash +#!/usr/bin/env bash +# 拉镜像 +ImageName= ; +docker pull ${ImageName} +# 启动镜像后测试单个模型 +run_cmd="bash PrepareEnv.sh; + cd /workspace/models/NLP/nlp_modelName/; + cp /workspace/scripts/NLP/nlp_modelName/preData.sh ./; + cp /workspace/scripts/NLP/nlp_modelName/run_benchmark.sh ./; + cp /workspace/scripts/NLP/nlp_modelName/analysis_log.py ./; + CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh sp 32 fp32 500; + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh sp 64 fp16 500; + " +# 启动镜像 +nvidia-docker run --name test_torch -it \ + --net=host \ + --shm-size=1g \ + -v $PWD:/workspace \ + ${ImageName} /bin/bash -c "${run_cmd}" + +``` +## 单个模型脚本目录 + +└── nlp_modelName # 模型名 + ├── README.md # 运行文档 + ├── analysis_log.py # log解析脚本,每个框架尽量统一,可参考[paddle的analysis.py](https://github.com/mmglove/benchmark/blob/jp_0907/scripts/analysis.py) + ├── logs # 训练log,注:log中不得包含机器ip等敏感信息 + │   ├── index # log解析后待入库数据json文件 + │   │   ├── nlp_modelName_sp_bs32_fp32_1_speed # 单卡数据 + │   │   └── nlp_modelName_mp_bs32_fp32_8_speed # 8卡数据 + │   └── train_log # 原始训练log + ├── preData.sh # 数据处理 + └── run_benchmark.sh # 运行脚本(包含性能、收敛性) + +## 输出 + +每个模型case需返回log解析后待入库数据json文件 + +```bash +{ +"log_file": "/logs/2021.0906.211134.post107/train_log/ResNet101_bs32_1_1_sp", \ # log 目录,创建规范见PrepareEnv.sh +"model_name": "clas_MobileNetv1_bs32_fp32", \ # 模型case名,创建规范:repoName_模型名_bs${bs_item}_${fp_item} 如:clas_MobileNetv1_bs32_fp32 +"mission_name": "图像分类", \ # 模型case所属任务名称,具体可参考scripts/config.ini +"direction_id": 0, \ # 模型case所属方向id,0:CV|1:NLP|2:Rec 具体可参考benchmark/scripts/config.ini +"run_mode": "sp", \ # 单卡:sp|多卡:mp +"index": 1, \ # 速度验证默认为1 +"gpu_num": 1, \ # 1|8 +"FINAL_RESULT": 197.514, \ # 速度计算后的平均值,需要skip掉不稳定的前几步值 +"JOB_FAIL_FLAG": 0, \ # 该模型case运行0:成功|1:失败 +"UNIT": "images/s" \ # 速度指标的单位 +} + +``` + + + diff --git a/OtherFrame/Speech/PyTorch/models/README_demo.md b/OtherFrame/Speech/PyTorch/models/README_demo.md new file mode 100644 index 0000000000..7beff87dc3 --- /dev/null +++ b/OtherFrame/Speech/PyTorch/models/README_demo.md @@ -0,0 +1,3 @@ +# NGC PyTorch 性能复现 +## 本readme仅为示例,相关内容请勿更新到此, +## models目录下存放模型代码,以以子库方式合入 diff --git a/OtherFrame/Speech/PyTorch/scripts/README_demo.md b/OtherFrame/Speech/PyTorch/scripts/README_demo.md new file mode 100644 index 0000000000..463728480a --- /dev/null +++ b/OtherFrame/Speech/PyTorch/scripts/README_demo.md @@ -0,0 +1,39 @@ +# NGC PyTorch 性能复现 +## 本readme仅为示例,相关内容请勿更新到此, NLP_demo也仅为示例 +## 目录 + +## 单个模型脚本目录 + +└── nlp_modelName # 模型名 + ├── README.md # 运行文档 + ├── analysis_log.py # log解析脚本,每个框架尽量统一,可参考[paddle的analysis.py](https://github.com/mmglove/benchmark/blob/jp_0907/scripts/analysis.py) + ├── logs # 训练log,注:log中不得包含机器ip等敏感信息 + │   ├── index # log解析后待入库数据json文件 + │   │   ├── nlp_modelName_sp_bs32_fp32_1_speed # 单卡数据 + │   │   └── nlp_modelName_mp_bs32_fp32_8_speed # 8卡数据 + │   └── train_log # 原始训练log + ├── preData.sh # 数据处理 + └── run_benchmark.sh # 运行脚本(包含性能、收敛性) + +## 输出 + +每个模型case需返回log解析后待入库数据json文件 + +```bash +{ +"log_file": "/logs/2021.0906.211134.post107/train_log/ResNet101_bs32_1_1_sp", \ # log 目录,创建规范见PrepareEnv.sh +"model_name": "clas_MobileNetv1_bs32_fp32", \ # 模型case名,创建规范:repoName_模型名_bs${bs_item}_${fp_item} 如:clas_MobileNetv1_bs32_fp32 +"mission_name": "图像分类", \ # 模型case所属任务名称,具体可参考scripts/config.ini +"direction_id": 0, \ # 模型case所属方向id,0:CV|1:NLP|2:Rec 具体可参考benchmark/scripts/config.ini +"run_mode": "sp", \ # 单卡:sp|多卡:mp +"index": 1, \ # 速度验证默认为1 +"gpu_num": 1, \ # 1|8 +"FINAL_RESULT": 197.514, \ # 速度计算后的平均值,需要skip掉不稳定的前几步值 +"JOB_FAIL_FLAG": 0, \ # 该模型case运行0:成功|1:失败 +"UNIT": "images/s" \ # 速度指标的单位 +} + +``` + + + diff --git a/OtherFrame/clas/PyTorch/README_demo.md b/OtherFrame/clas/PyTorch/README_demo.md new file mode 100644 index 0000000000..467a78553d --- /dev/null +++ b/OtherFrame/clas/PyTorch/README_demo.md @@ -0,0 +1,90 @@ +# NGC PyTorch 性能复现 +## 本readme仅为示例,相关内容请勿更新到此, NLP_demo也仅为示例 +## 目录 + +├── PrepareEnv.sh # 竞品PyTorch运行环境搭建 +├── README.md # 运行文档 +├── models # 提供竞品PyTorch框架的修改后的模型,官方模型请直接在脚本中拉取,统一方向的模型commit应一致,如不一致请单独在模型运行脚本中写明运行的commit +├── run_PyTorch.sh # 全量竞品PyTorch框架模型运行脚本 +└── scripts # 提供各个模型复现性能的脚本 +## 环境介绍 +### 1.物理机环境 +- 单机(单卡、8卡) + - 系统:CentOS release 7.5 (Final) + - GPU:Tesla V100-SXM2-32GB * 8 + - CPU:Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz * 80 + - Driver Version: 460.27.04 + - 内存:629 GB + - CUDA、cudnn Version: cuda10.1-cudnn7 、 cuda11.2-cudnn8-gcc82 +- 多机(32卡) TODO +### 2.Docker 镜像,如: + +NGC PyTorch 的代码仓库提供了自动构建 Docker 镜像的 [Dockerfile](https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/Translation/Transformer/Dockerfile), + +- **镜像版本**: `nvcr.io/nvidia/pytorch:20.06-py3` # 竞品镜像,每个方向的请一致 +- **PyTorch 版本**: `1.6.0a0+9907a3e` # 竞品版本:最新稳定版本,如需特定版本请备注说明原因 +- **CUDA 版本**: `11.2` +- **cuDnn 版本**: `8.0.1` + +## 测试步骤 +```bash +bash run_PyTorch.sh; # 创建容器,在该标准环境中测试模型 +``` +脚本内容,如: +```bash +#!/usr/bin/env bash +# 拉镜像 +ImageName= ; +docker pull ${ImageName} +# 启动镜像后测试单个模型 +run_cmd="bash PrepareEnv.sh; + cd /workspace/models/NLP/nlp_modelName/; + cp /workspace/scripts/NLP/nlp_modelName/preData.sh ./; + cp /workspace/scripts/NLP/nlp_modelName/run_benchmark.sh ./; + cp /workspace/scripts/NLP/nlp_modelName/analysis_log.py ./; + CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh sp 32 fp32 500; + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh sp 64 fp16 500; + " +# 启动镜像 +nvidia-docker run --name test_torch -it \ + --net=host \ + --shm-size=1g \ + -v $PWD:/workspace \ + ${ImageName} /bin/bash -c "${run_cmd}" + +``` +## 单个模型脚本目录 + +└── nlp_modelName # 模型名 + ├── README.md # 运行文档 + ├── analysis_log.py # log解析脚本,每个框架尽量统一,可参考[paddle的analysis.py](https://github.com/mmglove/benchmark/blob/jp_0907/scripts/analysis.py) + ├── logs # 训练log,注:log中不得包含机器ip等敏感信息 + │   ├── index # log解析后待入库数据json文件 + │   │   ├── nlp_modelName_sp_bs32_fp32_1_speed # 单卡数据 + │   │   └── nlp_modelName_mp_bs32_fp32_8_speed # 8卡数据 + │   └── train_log # 原始训练log + ├── preData.sh # 数据处理 + └── run_benchmark.sh # 运行脚本(包含性能、收敛性) + +## 输出 + +每个模型case需返回log解析后待入库数据json文件 + +```bash +{ +"log_file": "/logs/2021.0906.211134.post107/train_log/ResNet101_bs32_1_1_sp", \ # log 目录,创建规范见PrepareEnv.sh +"model_name": "clas_MobileNetv1_bs32_fp32", \ # 模型case名,创建规范:repoName_模型名_bs${bs_item}_${fp_item} 如:clas_MobileNetv1_bs32_fp32 +"mission_name": "图像分类", \ # 模型case所属任务名称,具体可参考scripts/config.ini +"direction_id": 0, \ # 模型case所属方向id,0:CV|1:NLP|2:Rec 具体可参考benchmark/scripts/config.ini +"run_mode": "sp", \ # 单卡:sp|多卡:mp +"index": 1, \ # 速度验证默认为1 +"gpu_num": 1, \ # 1|8 +"FINAL_RESULT": 197.514, \ # 速度计算后的平均值,需要skip掉不稳定的前几步值 +"JOB_FAIL_FLAG": 0, \ # 该模型case运行0:成功|1:失败 +"UNIT": "images/s" \ # 速度指标的单位 +} + +``` + + + diff --git a/OtherFrame/clas/PyTorch/models/README_demo.md b/OtherFrame/clas/PyTorch/models/README_demo.md new file mode 100644 index 0000000000..7beff87dc3 --- /dev/null +++ b/OtherFrame/clas/PyTorch/models/README_demo.md @@ -0,0 +1,3 @@ +# NGC PyTorch 性能复现 +## 本readme仅为示例,相关内容请勿更新到此, +## models目录下存放模型代码,以以子库方式合入 diff --git a/OtherFrame/clas/PyTorch/scripts/README_demo.md b/OtherFrame/clas/PyTorch/scripts/README_demo.md new file mode 100644 index 0000000000..463728480a --- /dev/null +++ b/OtherFrame/clas/PyTorch/scripts/README_demo.md @@ -0,0 +1,39 @@ +# NGC PyTorch 性能复现 +## 本readme仅为示例,相关内容请勿更新到此, NLP_demo也仅为示例 +## 目录 + +## 单个模型脚本目录 + +└── nlp_modelName # 模型名 + ├── README.md # 运行文档 + ├── analysis_log.py # log解析脚本,每个框架尽量统一,可参考[paddle的analysis.py](https://github.com/mmglove/benchmark/blob/jp_0907/scripts/analysis.py) + ├── logs # 训练log,注:log中不得包含机器ip等敏感信息 + │   ├── index # log解析后待入库数据json文件 + │   │   ├── nlp_modelName_sp_bs32_fp32_1_speed # 单卡数据 + │   │   └── nlp_modelName_mp_bs32_fp32_8_speed # 8卡数据 + │   └── train_log # 原始训练log + ├── preData.sh # 数据处理 + └── run_benchmark.sh # 运行脚本(包含性能、收敛性) + +## 输出 + +每个模型case需返回log解析后待入库数据json文件 + +```bash +{ +"log_file": "/logs/2021.0906.211134.post107/train_log/ResNet101_bs32_1_1_sp", \ # log 目录,创建规范见PrepareEnv.sh +"model_name": "clas_MobileNetv1_bs32_fp32", \ # 模型case名,创建规范:repoName_模型名_bs${bs_item}_${fp_item} 如:clas_MobileNetv1_bs32_fp32 +"mission_name": "图像分类", \ # 模型case所属任务名称,具体可参考scripts/config.ini +"direction_id": 0, \ # 模型case所属方向id,0:CV|1:NLP|2:Rec 具体可参考benchmark/scripts/config.ini +"run_mode": "sp", \ # 单卡:sp|多卡:mp +"index": 1, \ # 速度验证默认为1 +"gpu_num": 1, \ # 1|8 +"FINAL_RESULT": 197.514, \ # 速度计算后的平均值,需要skip掉不稳定的前几步值 +"JOB_FAIL_FLAG": 0, \ # 该模型case运行0:成功|1:失败 +"UNIT": "images/s" \ # 速度指标的单位 +} + +``` + + + diff --git a/OtherFrame/clas/mxnet/README_demo.md b/OtherFrame/clas/mxnet/README_demo.md new file mode 100644 index 0000000000..467a78553d --- /dev/null +++ b/OtherFrame/clas/mxnet/README_demo.md @@ -0,0 +1,90 @@ +# NGC PyTorch 性能复现 +## 本readme仅为示例,相关内容请勿更新到此, NLP_demo也仅为示例 +## 目录 + +├── PrepareEnv.sh # 竞品PyTorch运行环境搭建 +├── README.md # 运行文档 +├── models # 提供竞品PyTorch框架的修改后的模型,官方模型请直接在脚本中拉取,统一方向的模型commit应一致,如不一致请单独在模型运行脚本中写明运行的commit +├── run_PyTorch.sh # 全量竞品PyTorch框架模型运行脚本 +└── scripts # 提供各个模型复现性能的脚本 +## 环境介绍 +### 1.物理机环境 +- 单机(单卡、8卡) + - 系统:CentOS release 7.5 (Final) + - GPU:Tesla V100-SXM2-32GB * 8 + - CPU:Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz * 80 + - Driver Version: 460.27.04 + - 内存:629 GB + - CUDA、cudnn Version: cuda10.1-cudnn7 、 cuda11.2-cudnn8-gcc82 +- 多机(32卡) TODO +### 2.Docker 镜像,如: + +NGC PyTorch 的代码仓库提供了自动构建 Docker 镜像的 [Dockerfile](https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/Translation/Transformer/Dockerfile), + +- **镜像版本**: `nvcr.io/nvidia/pytorch:20.06-py3` # 竞品镜像,每个方向的请一致 +- **PyTorch 版本**: `1.6.0a0+9907a3e` # 竞品版本:最新稳定版本,如需特定版本请备注说明原因 +- **CUDA 版本**: `11.2` +- **cuDnn 版本**: `8.0.1` + +## 测试步骤 +```bash +bash run_PyTorch.sh; # 创建容器,在该标准环境中测试模型 +``` +脚本内容,如: +```bash +#!/usr/bin/env bash +# 拉镜像 +ImageName= ; +docker pull ${ImageName} +# 启动镜像后测试单个模型 +run_cmd="bash PrepareEnv.sh; + cd /workspace/models/NLP/nlp_modelName/; + cp /workspace/scripts/NLP/nlp_modelName/preData.sh ./; + cp /workspace/scripts/NLP/nlp_modelName/run_benchmark.sh ./; + cp /workspace/scripts/NLP/nlp_modelName/analysis_log.py ./; + CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh sp 32 fp32 500; + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh sp 64 fp16 500; + " +# 启动镜像 +nvidia-docker run --name test_torch -it \ + --net=host \ + --shm-size=1g \ + -v $PWD:/workspace \ + ${ImageName} /bin/bash -c "${run_cmd}" + +``` +## 单个模型脚本目录 + +└── nlp_modelName # 模型名 + ├── README.md # 运行文档 + ├── analysis_log.py # log解析脚本,每个框架尽量统一,可参考[paddle的analysis.py](https://github.com/mmglove/benchmark/blob/jp_0907/scripts/analysis.py) + ├── logs # 训练log,注:log中不得包含机器ip等敏感信息 + │   ├── index # log解析后待入库数据json文件 + │   │   ├── nlp_modelName_sp_bs32_fp32_1_speed # 单卡数据 + │   │   └── nlp_modelName_mp_bs32_fp32_8_speed # 8卡数据 + │   └── train_log # 原始训练log + ├── preData.sh # 数据处理 + └── run_benchmark.sh # 运行脚本(包含性能、收敛性) + +## 输出 + +每个模型case需返回log解析后待入库数据json文件 + +```bash +{ +"log_file": "/logs/2021.0906.211134.post107/train_log/ResNet101_bs32_1_1_sp", \ # log 目录,创建规范见PrepareEnv.sh +"model_name": "clas_MobileNetv1_bs32_fp32", \ # 模型case名,创建规范:repoName_模型名_bs${bs_item}_${fp_item} 如:clas_MobileNetv1_bs32_fp32 +"mission_name": "图像分类", \ # 模型case所属任务名称,具体可参考scripts/config.ini +"direction_id": 0, \ # 模型case所属方向id,0:CV|1:NLP|2:Rec 具体可参考benchmark/scripts/config.ini +"run_mode": "sp", \ # 单卡:sp|多卡:mp +"index": 1, \ # 速度验证默认为1 +"gpu_num": 1, \ # 1|8 +"FINAL_RESULT": 197.514, \ # 速度计算后的平均值,需要skip掉不稳定的前几步值 +"JOB_FAIL_FLAG": 0, \ # 该模型case运行0:成功|1:失败 +"UNIT": "images/s" \ # 速度指标的单位 +} + +``` + + + diff --git a/OtherFrame/clas/mxnet/models/README_demo.md b/OtherFrame/clas/mxnet/models/README_demo.md new file mode 100644 index 0000000000..7beff87dc3 --- /dev/null +++ b/OtherFrame/clas/mxnet/models/README_demo.md @@ -0,0 +1,3 @@ +# NGC PyTorch 性能复现 +## 本readme仅为示例,相关内容请勿更新到此, +## models目录下存放模型代码,以以子库方式合入 diff --git a/OtherFrame/clas/mxnet/scripts/README_demo.md b/OtherFrame/clas/mxnet/scripts/README_demo.md new file mode 100644 index 0000000000..463728480a --- /dev/null +++ b/OtherFrame/clas/mxnet/scripts/README_demo.md @@ -0,0 +1,39 @@ +# NGC PyTorch 性能复现 +## 本readme仅为示例,相关内容请勿更新到此, NLP_demo也仅为示例 +## 目录 + +## 单个模型脚本目录 + +└── nlp_modelName # 模型名 + ├── README.md # 运行文档 + ├── analysis_log.py # log解析脚本,每个框架尽量统一,可参考[paddle的analysis.py](https://github.com/mmglove/benchmark/blob/jp_0907/scripts/analysis.py) + ├── logs # 训练log,注:log中不得包含机器ip等敏感信息 + │   ├── index # log解析后待入库数据json文件 + │   │   ├── nlp_modelName_sp_bs32_fp32_1_speed # 单卡数据 + │   │   └── nlp_modelName_mp_bs32_fp32_8_speed # 8卡数据 + │   └── train_log # 原始训练log + ├── preData.sh # 数据处理 + └── run_benchmark.sh # 运行脚本(包含性能、收敛性) + +## 输出 + +每个模型case需返回log解析后待入库数据json文件 + +```bash +{ +"log_file": "/logs/2021.0906.211134.post107/train_log/ResNet101_bs32_1_1_sp", \ # log 目录,创建规范见PrepareEnv.sh +"model_name": "clas_MobileNetv1_bs32_fp32", \ # 模型case名,创建规范:repoName_模型名_bs${bs_item}_${fp_item} 如:clas_MobileNetv1_bs32_fp32 +"mission_name": "图像分类", \ # 模型case所属任务名称,具体可参考scripts/config.ini +"direction_id": 0, \ # 模型case所属方向id,0:CV|1:NLP|2:Rec 具体可参考benchmark/scripts/config.ini +"run_mode": "sp", \ # 单卡:sp|多卡:mp +"index": 1, \ # 速度验证默认为1 +"gpu_num": 1, \ # 1|8 +"FINAL_RESULT": 197.514, \ # 速度计算后的平均值,需要skip掉不稳定的前几步值 +"JOB_FAIL_FLAG": 0, \ # 该模型case运行0:成功|1:失败 +"UNIT": "images/s" \ # 速度指标的单位 +} + +``` + + + diff --git a/OtherFrame/detection/PyTorch/README_demo.md b/OtherFrame/detection/PyTorch/README_demo.md new file mode 100644 index 0000000000..467a78553d --- /dev/null +++ b/OtherFrame/detection/PyTorch/README_demo.md @@ -0,0 +1,90 @@ +# NGC PyTorch 性能复现 +## 本readme仅为示例,相关内容请勿更新到此, NLP_demo也仅为示例 +## 目录 + +├── PrepareEnv.sh # 竞品PyTorch运行环境搭建 +├── README.md # 运行文档 +├── models # 提供竞品PyTorch框架的修改后的模型,官方模型请直接在脚本中拉取,统一方向的模型commit应一致,如不一致请单独在模型运行脚本中写明运行的commit +├── run_PyTorch.sh # 全量竞品PyTorch框架模型运行脚本 +└── scripts # 提供各个模型复现性能的脚本 +## 环境介绍 +### 1.物理机环境 +- 单机(单卡、8卡) + - 系统:CentOS release 7.5 (Final) + - GPU:Tesla V100-SXM2-32GB * 8 + - CPU:Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz * 80 + - Driver Version: 460.27.04 + - 内存:629 GB + - CUDA、cudnn Version: cuda10.1-cudnn7 、 cuda11.2-cudnn8-gcc82 +- 多机(32卡) TODO +### 2.Docker 镜像,如: + +NGC PyTorch 的代码仓库提供了自动构建 Docker 镜像的 [Dockerfile](https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/Translation/Transformer/Dockerfile), + +- **镜像版本**: `nvcr.io/nvidia/pytorch:20.06-py3` # 竞品镜像,每个方向的请一致 +- **PyTorch 版本**: `1.6.0a0+9907a3e` # 竞品版本:最新稳定版本,如需特定版本请备注说明原因 +- **CUDA 版本**: `11.2` +- **cuDnn 版本**: `8.0.1` + +## 测试步骤 +```bash +bash run_PyTorch.sh; # 创建容器,在该标准环境中测试模型 +``` +脚本内容,如: +```bash +#!/usr/bin/env bash +# 拉镜像 +ImageName= ; +docker pull ${ImageName} +# 启动镜像后测试单个模型 +run_cmd="bash PrepareEnv.sh; + cd /workspace/models/NLP/nlp_modelName/; + cp /workspace/scripts/NLP/nlp_modelName/preData.sh ./; + cp /workspace/scripts/NLP/nlp_modelName/run_benchmark.sh ./; + cp /workspace/scripts/NLP/nlp_modelName/analysis_log.py ./; + CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh sp 32 fp32 500; + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh sp 64 fp16 500; + " +# 启动镜像 +nvidia-docker run --name test_torch -it \ + --net=host \ + --shm-size=1g \ + -v $PWD:/workspace \ + ${ImageName} /bin/bash -c "${run_cmd}" + +``` +## 单个模型脚本目录 + +└── nlp_modelName # 模型名 + ├── README.md # 运行文档 + ├── analysis_log.py # log解析脚本,每个框架尽量统一,可参考[paddle的analysis.py](https://github.com/mmglove/benchmark/blob/jp_0907/scripts/analysis.py) + ├── logs # 训练log,注:log中不得包含机器ip等敏感信息 + │   ├── index # log解析后待入库数据json文件 + │   │   ├── nlp_modelName_sp_bs32_fp32_1_speed # 单卡数据 + │   │   └── nlp_modelName_mp_bs32_fp32_8_speed # 8卡数据 + │   └── train_log # 原始训练log + ├── preData.sh # 数据处理 + └── run_benchmark.sh # 运行脚本(包含性能、收敛性) + +## 输出 + +每个模型case需返回log解析后待入库数据json文件 + +```bash +{ +"log_file": "/logs/2021.0906.211134.post107/train_log/ResNet101_bs32_1_1_sp", \ # log 目录,创建规范见PrepareEnv.sh +"model_name": "clas_MobileNetv1_bs32_fp32", \ # 模型case名,创建规范:repoName_模型名_bs${bs_item}_${fp_item} 如:clas_MobileNetv1_bs32_fp32 +"mission_name": "图像分类", \ # 模型case所属任务名称,具体可参考scripts/config.ini +"direction_id": 0, \ # 模型case所属方向id,0:CV|1:NLP|2:Rec 具体可参考benchmark/scripts/config.ini +"run_mode": "sp", \ # 单卡:sp|多卡:mp +"index": 1, \ # 速度验证默认为1 +"gpu_num": 1, \ # 1|8 +"FINAL_RESULT": 197.514, \ # 速度计算后的平均值,需要skip掉不稳定的前几步值 +"JOB_FAIL_FLAG": 0, \ # 该模型case运行0:成功|1:失败 +"UNIT": "images/s" \ # 速度指标的单位 +} + +``` + + + diff --git a/OtherFrame/detection/PyTorch/models/README_demo.md b/OtherFrame/detection/PyTorch/models/README_demo.md new file mode 100644 index 0000000000..7beff87dc3 --- /dev/null +++ b/OtherFrame/detection/PyTorch/models/README_demo.md @@ -0,0 +1,3 @@ +# NGC PyTorch 性能复现 +## 本readme仅为示例,相关内容请勿更新到此, +## models目录下存放模型代码,以以子库方式合入 diff --git a/OtherFrame/detection/PyTorch/scripts/README_demo.md b/OtherFrame/detection/PyTorch/scripts/README_demo.md new file mode 100644 index 0000000000..463728480a --- /dev/null +++ b/OtherFrame/detection/PyTorch/scripts/README_demo.md @@ -0,0 +1,39 @@ +# NGC PyTorch 性能复现 +## 本readme仅为示例,相关内容请勿更新到此, NLP_demo也仅为示例 +## 目录 + +## 单个模型脚本目录 + +└── nlp_modelName # 模型名 + ├── README.md # 运行文档 + ├── analysis_log.py # log解析脚本,每个框架尽量统一,可参考[paddle的analysis.py](https://github.com/mmglove/benchmark/blob/jp_0907/scripts/analysis.py) + ├── logs # 训练log,注:log中不得包含机器ip等敏感信息 + │   ├── index # log解析后待入库数据json文件 + │   │   ├── nlp_modelName_sp_bs32_fp32_1_speed # 单卡数据 + │   │   └── nlp_modelName_mp_bs32_fp32_8_speed # 8卡数据 + │   └── train_log # 原始训练log + ├── preData.sh # 数据处理 + └── run_benchmark.sh # 运行脚本(包含性能、收敛性) + +## 输出 + +每个模型case需返回log解析后待入库数据json文件 + +```bash +{ +"log_file": "/logs/2021.0906.211134.post107/train_log/ResNet101_bs32_1_1_sp", \ # log 目录,创建规范见PrepareEnv.sh +"model_name": "clas_MobileNetv1_bs32_fp32", \ # 模型case名,创建规范:repoName_模型名_bs${bs_item}_${fp_item} 如:clas_MobileNetv1_bs32_fp32 +"mission_name": "图像分类", \ # 模型case所属任务名称,具体可参考scripts/config.ini +"direction_id": 0, \ # 模型case所属方向id,0:CV|1:NLP|2:Rec 具体可参考benchmark/scripts/config.ini +"run_mode": "sp", \ # 单卡:sp|多卡:mp +"index": 1, \ # 速度验证默认为1 +"gpu_num": 1, \ # 1|8 +"FINAL_RESULT": 197.514, \ # 速度计算后的平均值,需要skip掉不稳定的前几步值 +"JOB_FAIL_FLAG": 0, \ # 该模型case运行0:成功|1:失败 +"UNIT": "images/s" \ # 速度指标的单位 +} + +``` + + + diff --git a/OtherFrame/gan/PyTorch/README_demo.md b/OtherFrame/gan/PyTorch/README_demo.md new file mode 100644 index 0000000000..467a78553d --- /dev/null +++ b/OtherFrame/gan/PyTorch/README_demo.md @@ -0,0 +1,90 @@ +# NGC PyTorch 性能复现 +## 本readme仅为示例,相关内容请勿更新到此, NLP_demo也仅为示例 +## 目录 + +├── PrepareEnv.sh # 竞品PyTorch运行环境搭建 +├── README.md # 运行文档 +├── models # 提供竞品PyTorch框架的修改后的模型,官方模型请直接在脚本中拉取,统一方向的模型commit应一致,如不一致请单独在模型运行脚本中写明运行的commit +├── run_PyTorch.sh # 全量竞品PyTorch框架模型运行脚本 +└── scripts # 提供各个模型复现性能的脚本 +## 环境介绍 +### 1.物理机环境 +- 单机(单卡、8卡) + - 系统:CentOS release 7.5 (Final) + - GPU:Tesla V100-SXM2-32GB * 8 + - CPU:Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz * 80 + - Driver Version: 460.27.04 + - 内存:629 GB + - CUDA、cudnn Version: cuda10.1-cudnn7 、 cuda11.2-cudnn8-gcc82 +- 多机(32卡) TODO +### 2.Docker 镜像,如: + +NGC PyTorch 的代码仓库提供了自动构建 Docker 镜像的 [Dockerfile](https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/Translation/Transformer/Dockerfile), + +- **镜像版本**: `nvcr.io/nvidia/pytorch:20.06-py3` # 竞品镜像,每个方向的请一致 +- **PyTorch 版本**: `1.6.0a0+9907a3e` # 竞品版本:最新稳定版本,如需特定版本请备注说明原因 +- **CUDA 版本**: `11.2` +- **cuDnn 版本**: `8.0.1` + +## 测试步骤 +```bash +bash run_PyTorch.sh; # 创建容器,在该标准环境中测试模型 +``` +脚本内容,如: +```bash +#!/usr/bin/env bash +# 拉镜像 +ImageName= ; +docker pull ${ImageName} +# 启动镜像后测试单个模型 +run_cmd="bash PrepareEnv.sh; + cd /workspace/models/NLP/nlp_modelName/; + cp /workspace/scripts/NLP/nlp_modelName/preData.sh ./; + cp /workspace/scripts/NLP/nlp_modelName/run_benchmark.sh ./; + cp /workspace/scripts/NLP/nlp_modelName/analysis_log.py ./; + CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh sp 32 fp32 500; + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh sp 64 fp16 500; + " +# 启动镜像 +nvidia-docker run --name test_torch -it \ + --net=host \ + --shm-size=1g \ + -v $PWD:/workspace \ + ${ImageName} /bin/bash -c "${run_cmd}" + +``` +## 单个模型脚本目录 + +└── nlp_modelName # 模型名 + ├── README.md # 运行文档 + ├── analysis_log.py # log解析脚本,每个框架尽量统一,可参考[paddle的analysis.py](https://github.com/mmglove/benchmark/blob/jp_0907/scripts/analysis.py) + ├── logs # 训练log,注:log中不得包含机器ip等敏感信息 + │   ├── index # log解析后待入库数据json文件 + │   │   ├── nlp_modelName_sp_bs32_fp32_1_speed # 单卡数据 + │   │   └── nlp_modelName_mp_bs32_fp32_8_speed # 8卡数据 + │   └── train_log # 原始训练log + ├── preData.sh # 数据处理 + └── run_benchmark.sh # 运行脚本(包含性能、收敛性) + +## 输出 + +每个模型case需返回log解析后待入库数据json文件 + +```bash +{ +"log_file": "/logs/2021.0906.211134.post107/train_log/ResNet101_bs32_1_1_sp", \ # log 目录,创建规范见PrepareEnv.sh +"model_name": "clas_MobileNetv1_bs32_fp32", \ # 模型case名,创建规范:repoName_模型名_bs${bs_item}_${fp_item} 如:clas_MobileNetv1_bs32_fp32 +"mission_name": "图像分类", \ # 模型case所属任务名称,具体可参考scripts/config.ini +"direction_id": 0, \ # 模型case所属方向id,0:CV|1:NLP|2:Rec 具体可参考benchmark/scripts/config.ini +"run_mode": "sp", \ # 单卡:sp|多卡:mp +"index": 1, \ # 速度验证默认为1 +"gpu_num": 1, \ # 1|8 +"FINAL_RESULT": 197.514, \ # 速度计算后的平均值,需要skip掉不稳定的前几步值 +"JOB_FAIL_FLAG": 0, \ # 该模型case运行0:成功|1:失败 +"UNIT": "images/s" \ # 速度指标的单位 +} + +``` + + + diff --git a/OtherFrame/gan/PyTorch/models/README_demo.md b/OtherFrame/gan/PyTorch/models/README_demo.md new file mode 100644 index 0000000000..7beff87dc3 --- /dev/null +++ b/OtherFrame/gan/PyTorch/models/README_demo.md @@ -0,0 +1,3 @@ +# NGC PyTorch 性能复现 +## 本readme仅为示例,相关内容请勿更新到此, +## models目录下存放模型代码,以以子库方式合入 diff --git a/OtherFrame/gan/PyTorch/scripts/README_demo.md b/OtherFrame/gan/PyTorch/scripts/README_demo.md new file mode 100644 index 0000000000..463728480a --- /dev/null +++ b/OtherFrame/gan/PyTorch/scripts/README_demo.md @@ -0,0 +1,39 @@ +# NGC PyTorch 性能复现 +## 本readme仅为示例,相关内容请勿更新到此, NLP_demo也仅为示例 +## 目录 + +## 单个模型脚本目录 + +└── nlp_modelName # 模型名 + ├── README.md # 运行文档 + ├── analysis_log.py # log解析脚本,每个框架尽量统一,可参考[paddle的analysis.py](https://github.com/mmglove/benchmark/blob/jp_0907/scripts/analysis.py) + ├── logs # 训练log,注:log中不得包含机器ip等敏感信息 + │   ├── index # log解析后待入库数据json文件 + │   │   ├── nlp_modelName_sp_bs32_fp32_1_speed # 单卡数据 + │   │   └── nlp_modelName_mp_bs32_fp32_8_speed # 8卡数据 + │   └── train_log # 原始训练log + ├── preData.sh # 数据处理 + └── run_benchmark.sh # 运行脚本(包含性能、收敛性) + +## 输出 + +每个模型case需返回log解析后待入库数据json文件 + +```bash +{ +"log_file": "/logs/2021.0906.211134.post107/train_log/ResNet101_bs32_1_1_sp", \ # log 目录,创建规范见PrepareEnv.sh +"model_name": "clas_MobileNetv1_bs32_fp32", \ # 模型case名,创建规范:repoName_模型名_bs${bs_item}_${fp_item} 如:clas_MobileNetv1_bs32_fp32 +"mission_name": "图像分类", \ # 模型case所属任务名称,具体可参考scripts/config.ini +"direction_id": 0, \ # 模型case所属方向id,0:CV|1:NLP|2:Rec 具体可参考benchmark/scripts/config.ini +"run_mode": "sp", \ # 单卡:sp|多卡:mp +"index": 1, \ # 速度验证默认为1 +"gpu_num": 1, \ # 1|8 +"FINAL_RESULT": 197.514, \ # 速度计算后的平均值,需要skip掉不稳定的前几步值 +"JOB_FAIL_FLAG": 0, \ # 该模型case运行0:成功|1:失败 +"UNIT": "images/s" \ # 速度指标的单位 +} + +``` + + + diff --git a/OtherFrame/nlp/PyTorch/PrepareEnv_demo.sh b/OtherFrame/nlp/PyTorch/PrepareEnv_demo.sh new file mode 100755 index 0000000000..1fef7d0e1a --- /dev/null +++ b/OtherFrame/nlp/PyTorch/PrepareEnv_demo.sh @@ -0,0 +1,42 @@ +#!/usr/bin/env bash + +## 注意,本脚本仅为示例,相关内容请勿更新到此 + +# 公共配置文件,配置python 安装pytorch,运行目录:/workspace (起容器的时候映射的目录:benchmark/OtherFrameworks/PyTorch/) +echo "*******prepare benchmark***********" +################################# 创建一些log目录,如: +export BENCHMARK_ROOT=/workspace # 起容器的时候映射的目录 benchmark/OtherFrameworks/PyTorch/ +log_date=`date "+%Y.%m%d.%H%M%S"` +frame=pytorch1.6 +cuda_version=10.1 +save_log_dir=${BENCHMARK_ROOT}/logs/${frame}_${log_date}_${cuda_version}/ + +if [[ -d ${save_log_dir} ]]; then + rm -rf ${save_log_dir} +fi +# this for update the log_path coding mat +export TRAIN_LOG_DIR=${save_log_dir}/train_log +mkdir -p ${TRAIN_LOG_DIR} + +log_path=${TRAIN_LOG_DIR} +################################# 配置python, 如: +rm -rf run_env +mkdir run_env +ln -s $(which python3.7) run_env/python +ln -s $(which pip3.7) run_env/pip +export PATH=/workspace/run_env:${PATH} + +################################# 安装框架 如: +pip install -U pip +echo `pip --version` +pip install torch==1.6.0 torchvision==0.7.0 -f https://download.pytorch.org/whl/torch_stable.html + +################################## 安装其他的公共依赖(单个模型的依赖在中设置,All_PyTorch_Models.sh 中),如: +pip install -i https://pypi.tuna.tsinghua.edu.cn/simple opencv-python +# dali install +pip install --extra-index-url https://developer.download.nvidia.com/compute/redist nvidia-dali-cuda$(echo ${cuda_version}|cut -d "." -f1)0 # note: dali 版本格式是cuda100 & cuda110 + +echo "*******prepare benchmark end***********" + + + diff --git a/OtherFrame/nlp/PyTorch/README_demo.md b/OtherFrame/nlp/PyTorch/README_demo.md new file mode 100644 index 0000000000..467a78553d --- /dev/null +++ b/OtherFrame/nlp/PyTorch/README_demo.md @@ -0,0 +1,90 @@ +# NGC PyTorch 性能复现 +## 本readme仅为示例,相关内容请勿更新到此, NLP_demo也仅为示例 +## 目录 + +├── PrepareEnv.sh # 竞品PyTorch运行环境搭建 +├── README.md # 运行文档 +├── models # 提供竞品PyTorch框架的修改后的模型,官方模型请直接在脚本中拉取,统一方向的模型commit应一致,如不一致请单独在模型运行脚本中写明运行的commit +├── run_PyTorch.sh # 全量竞品PyTorch框架模型运行脚本 +└── scripts # 提供各个模型复现性能的脚本 +## 环境介绍 +### 1.物理机环境 +- 单机(单卡、8卡) + - 系统:CentOS release 7.5 (Final) + - GPU:Tesla V100-SXM2-32GB * 8 + - CPU:Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz * 80 + - Driver Version: 460.27.04 + - 内存:629 GB + - CUDA、cudnn Version: cuda10.1-cudnn7 、 cuda11.2-cudnn8-gcc82 +- 多机(32卡) TODO +### 2.Docker 镜像,如: + +NGC PyTorch 的代码仓库提供了自动构建 Docker 镜像的 [Dockerfile](https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/Translation/Transformer/Dockerfile), + +- **镜像版本**: `nvcr.io/nvidia/pytorch:20.06-py3` # 竞品镜像,每个方向的请一致 +- **PyTorch 版本**: `1.6.0a0+9907a3e` # 竞品版本:最新稳定版本,如需特定版本请备注说明原因 +- **CUDA 版本**: `11.2` +- **cuDnn 版本**: `8.0.1` + +## 测试步骤 +```bash +bash run_PyTorch.sh; # 创建容器,在该标准环境中测试模型 +``` +脚本内容,如: +```bash +#!/usr/bin/env bash +# 拉镜像 +ImageName= ; +docker pull ${ImageName} +# 启动镜像后测试单个模型 +run_cmd="bash PrepareEnv.sh; + cd /workspace/models/NLP/nlp_modelName/; + cp /workspace/scripts/NLP/nlp_modelName/preData.sh ./; + cp /workspace/scripts/NLP/nlp_modelName/run_benchmark.sh ./; + cp /workspace/scripts/NLP/nlp_modelName/analysis_log.py ./; + CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh sp 32 fp32 500; + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh sp 64 fp16 500; + " +# 启动镜像 +nvidia-docker run --name test_torch -it \ + --net=host \ + --shm-size=1g \ + -v $PWD:/workspace \ + ${ImageName} /bin/bash -c "${run_cmd}" + +``` +## 单个模型脚本目录 + +└── nlp_modelName # 模型名 + ├── README.md # 运行文档 + ├── analysis_log.py # log解析脚本,每个框架尽量统一,可参考[paddle的analysis.py](https://github.com/mmglove/benchmark/blob/jp_0907/scripts/analysis.py) + ├── logs # 训练log,注:log中不得包含机器ip等敏感信息 + │   ├── index # log解析后待入库数据json文件 + │   │   ├── nlp_modelName_sp_bs32_fp32_1_speed # 单卡数据 + │   │   └── nlp_modelName_mp_bs32_fp32_8_speed # 8卡数据 + │   └── train_log # 原始训练log + ├── preData.sh # 数据处理 + └── run_benchmark.sh # 运行脚本(包含性能、收敛性) + +## 输出 + +每个模型case需返回log解析后待入库数据json文件 + +```bash +{ +"log_file": "/logs/2021.0906.211134.post107/train_log/ResNet101_bs32_1_1_sp", \ # log 目录,创建规范见PrepareEnv.sh +"model_name": "clas_MobileNetv1_bs32_fp32", \ # 模型case名,创建规范:repoName_模型名_bs${bs_item}_${fp_item} 如:clas_MobileNetv1_bs32_fp32 +"mission_name": "图像分类", \ # 模型case所属任务名称,具体可参考scripts/config.ini +"direction_id": 0, \ # 模型case所属方向id,0:CV|1:NLP|2:Rec 具体可参考benchmark/scripts/config.ini +"run_mode": "sp", \ # 单卡:sp|多卡:mp +"index": 1, \ # 速度验证默认为1 +"gpu_num": 1, \ # 1|8 +"FINAL_RESULT": 197.514, \ # 速度计算后的平均值,需要skip掉不稳定的前几步值 +"JOB_FAIL_FLAG": 0, \ # 该模型case运行0:成功|1:失败 +"UNIT": "images/s" \ # 速度指标的单位 +} + +``` + + + diff --git a/OtherFrame/nlp/PyTorch/models/NLP_demo/nlp_modelName/train.py b/OtherFrame/nlp/PyTorch/models/NLP_demo/nlp_modelName/train.py new file mode 100644 index 0000000000..6274c8ebd0 --- /dev/null +++ b/OtherFrame/nlp/PyTorch/models/NLP_demo/nlp_modelName/train.py @@ -0,0 +1,3 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# encoding=utf-8 vi:ts=4:sw=4:expandtab:ft=python diff --git a/OtherFrame/nlp/PyTorch/run_PyTorch_demo.sh b/OtherFrame/nlp/PyTorch/run_PyTorch_demo.sh new file mode 100755 index 0000000000..de8ba9d108 --- /dev/null +++ b/OtherFrame/nlp/PyTorch/run_PyTorch_demo.sh @@ -0,0 +1,25 @@ +#!/usr/bin/env bash + +## 注意,本脚本仅为示例,相关内容请勿更新到此 + +# 拉镜像 +ImageName= ; +docker pull ${ImageName} + +# 启动镜像后测试单个模型 +run_cmd="bash PrepareEnv.sh; + cd /workspace/models/NLP/nlp_modelName/; + cp /workspace/scripts/NLP/nlp_modelName/preData.sh ./; + cp /workspace/scripts/NLP/nlp_modelName/run_benchmark.sh ./; + cp /workspace/scripts/NLP/nlp_modelName/analysis_log.py ./; + CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh sp 32 fp32 500; + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh sp 64 fp16 500; + " +# 启动镜像 +nvidia-docker run --name test_torch -it \ + --net=host \ + --shm-size=1g \ + -v $PWD:/workspace \ + ${ImageName} /bin/bash -c "${run_cmd}" + + diff --git a/OtherFrame/nlp/PyTorch/scripts/NLP_demo/nlp_modelName/README.md b/OtherFrame/nlp/PyTorch/scripts/NLP_demo/nlp_modelName/README.md new file mode 100644 index 0000000000..752784179a --- /dev/null +++ b/OtherFrame/nlp/PyTorch/scripts/NLP_demo/nlp_modelName/README.md @@ -0,0 +1,121 @@ + +# NGC PyTorch Bert 性能复现 + +此处给出了基于 [NGC PyTorch](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/LanguageModeling/BERT) 实现的 Bert Base Pre-Training 任务的详细复现流程,包括执行环境、PyTorch版本、环境搭建、复现脚本、测试结果和测试日志。 + + +## 目录 +- [一、环境介绍](#一环境介绍) + - [1.物理机环境](#1物理机环境) + - [2.Docker 镜像](#2docker-镜像) +- [二、环境搭建](#二环境搭建) + - [1. 单机(单卡、8卡)环境搭建](#1-单机单卡8卡环境搭建) +- [三、测试步骤](#三测试步骤) + - [1. 单机(单卡、8卡)测试](#1-单机单卡8卡测试) +- [四、测试结果](#四测试结果) +- [五、日志数据](#五日志数据) + - [1.单机(单卡、8卡)日志](#1单机单卡8卡日志) + + +## 一、环境介绍 + +### 1.物理机环境(如每个框架用的一致可链接到标准环境) + +物理机环境,对 [NGC PyTorch](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/LanguageModeling/BERT) 的 Bert 模型进行了测试,详细物理机配置,见[Paddle Bert Base 性能测试](../../README.md#1.物理机环境)。 + +- 单机(单卡、8卡) + - 系统:CentOS release 7.5 (Final) + - GPU:Tesla V100-SXM2-16GB * 8 + - CPU:Intel(R) Xeon(R) Gold 6148 CPU @ 2.40GHz * 38 + - Driver Version: 460.32.03 + - 内存:502 GB + +- 多机(32卡) + - 系统:CentOS release 6.3 (Final) + - GPU:Tesla V100-SXM2-32GB * 8 + - CPU:Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz * 48 + - Driver Version: 450.80.02 + - 内存:502 GB + +### 2.Docker 镜像(如每个框架用的一致可链接到标准环境) + +NGC PyTorch 的代码仓库提供了自动构建 Docker 镜像的的 [shell 脚本](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/LanguageModeling/BERT/scripts/docker/build.sh), + +- **镜像版本**: `nvcr.io/nvidia/pytorch:20.06-py3` +- **PyTorch 版本**: `1.6.0a0+9907a3e` +- **CUDA 版本**: `11.0.167` +- **cuDnn 版本**: `8.0.1` + +## 二、环境搭建 + +### 1. 单机(单卡、8卡)环境搭建 + +我们遵循了 NGC PyTorch 官网提供的 [Quick Start Guide](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/LanguageModeling/BERT#quick-start-guide) 教程搭建了测试环境,主要过程如下: + +- **拉取代码** + + ```bash + git clone https://github.com/NVIDIA/DeepLearningExamples + cd DeepLearningExamples/PyTorch/LanguageModeling/BERT + # 本次测试是在如下版本下完成的: + git checkout 8d8c524df634e4dfa0cfbf77a904ce2ede85e2ec + ``` +- **准备数据** (也可写到数据处理脚本中,需提供小数据集,训练时间在5min内) + + NGC PyTorch 提供单独的数据下载和预处理脚本 [data/create_datasets_from_start.sh](https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/LanguageModeling/BERT/data/create_datasets_from_start.sh)。在容器中执行如下命令,可以下载和制作 `wikicorpus_en` 的 hdf5 数据集。 + + ```bash + bash data/create_datasets_from_start.sh wiki_only + ``` + + 由于数据集比较大,且容易受网速的影响,上述命令执行时间较长。因此,为了更方便复现竞品的性能数据,我们提供了已经处理好的 seq_len=128 的 hdf5 格式[样本数据集](https://bert-data.bj.bcebos.com/benchmark_sample%2Fhdf5_lower_case_1_seq_len_128_max_pred_20_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5.tar.gz),共100个 part hdf5 数据文件,约 3.1G。 + + 数据下载后,会得到一个 `hdf5_lower_case_1_seq_len_128_max_pred_20_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5.tar.gz`压缩文件: + + ```bash + # 解压数据集 + tar -xzvf benchmark_sample_hdf5_lower_case_1_seq_len_128_max_pred_20_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5.tar.gz + + # 放到 data/ 目录下 + mv benchmark_sample_hdf5_lower_case_1_seq_len_128_max_pred_20_masked_lm_prob_0.15_random_seed_12345_dupe_factor_5 bert/data/ + ``` + + 修改 [scripts/run_pretraining.sh](https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/LanguageModeling/BERT/scripts/run_pretraining.sh#L37)脚本的 `DATASET`变量为上述数据集地址即可。 + + +## 三、测试步骤 +为了更方便地测试不同 batch_size、num_gpus 组合下的性能,我们单独编写了 `run_benchmark.sh` 脚本,该脚本需包含运行模型的命令和分析log产出待入库json文件的脚本,详细内容请见脚本; + +**重要的配置参数:** +- **run_mode**: 单卡sp|多卡mp +- **batch_size**: 用于第一阶段的单卡总 batch_size +- **fp_item**: 用于指定精度训练模式,fp32 或 fp16 +- **max_iter**: 运行的最大iter或epoch,根据模型选择 + +- **gradient_accumulation_steps**: 每次执行 optimizer 前的梯度累加步数 +- **BERT_CONFIG:** 用于指定 base 或 large 模型的参数配置文件 (line:49) +- **bert_model:** 用于指定模型类型,默认为`bert-large-uncased` + +### 1. 单机(单卡、8卡)测试 +- **单卡启动脚本:** + + 若测试单机单卡 batch_size=32、FP32 的训练性能,执行如下命令: + + ```bash + CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh sp 32 fp32 500 ${log_path} # 如果fp32\fp16不方便放在一个脚本,可另写 + ``` + +- **8卡启动脚本:** + + 若测试单机8卡 batch_size=64、FP16 的训练性能,执行如下命令: + + ```bash + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh mp 64 fp16 500 + ``` +- **收敛性验证:** + + 若测试单机8卡 batch_size=64、FP16 的收敛性,执行如下命令,收敛指标:(如loss10.+下降到0.1+\acc:0.98,收敛耗时:v100*32G*8卡*1d) + + ```bash + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh mp 64 fp16 50000 + ``` diff --git a/OtherFrame/nlp/PyTorch/scripts/NLP_demo/nlp_modelName/analysis_log.py b/OtherFrame/nlp/PyTorch/scripts/NLP_demo/nlp_modelName/analysis_log.py new file mode 100644 index 0000000000..ce0fbe9cff --- /dev/null +++ b/OtherFrame/nlp/PyTorch/scripts/NLP_demo/nlp_modelName/analysis_log.py @@ -0,0 +1,4 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# encoding=utf-8 vi:ts=4:sw=4:expandtab:ft=python + diff --git a/OtherFrame/nlp/PyTorch/scripts/NLP_demo/nlp_modelName/preData.sh b/OtherFrame/nlp/PyTorch/scripts/NLP_demo/nlp_modelName/preData.sh new file mode 100755 index 0000000000..726006f3e8 --- /dev/null +++ b/OtherFrame/nlp/PyTorch/scripts/NLP_demo/nlp_modelName/preData.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +# 下载数据 +# 安装依赖 diff --git a/OtherFrame/nlp/PyTorch/scripts/NLP_demo/nlp_modelName/run_benchmark.sh b/OtherFrame/nlp/PyTorch/scripts/NLP_demo/nlp_modelName/run_benchmark.sh new file mode 100755 index 0000000000..eb57caaf69 --- /dev/null +++ b/OtherFrame/nlp/PyTorch/scripts/NLP_demo/nlp_modelName/run_benchmark.sh @@ -0,0 +1,60 @@ +#!/usr/bin/env bash +set -xe +# 参数说明 +function _set_params(){ + run_mode=${1:-"sp"} + batch_size=${2:-"64"} + fp_item=${3:-"fp32"} # fp32|fp16 + max_iter=${4} # 如果需要修改代码提前中断 + run_log_path=${TRAIN_LOG_DIR:-$(pwd)} + + model_name="nlp_modelName" + mission_name="语义表示" # 模型所属任务名称,具体可参考scripts/config.ini 必填) + direction_id=1 # 任务所属方向,0:CV,1:NLP,2:Rec。 (必填) + ips_unit="sequences/s" + + device=${CUDA_VISIBLE_DEVICES//,/ } + arr=(${device}) + num_gpu_devices=${#arr[*]} + log_file=${run_log_path}/${model_name}_${run_mode}_bs${batch_size}_${fp_item}_${num_gpu_devices} + index_log_file=${run_log_path}/${model_name}_${run_mode}_bs${batch_size}_${fp_item}_${num_gpu_devices}_speed +} +function _train(){ + echo "Train on ${num_gpu_devices} GPUs" + echo "current CUDA_VISIBLE_DEVICES=$CUDA_VISIBLE_DEVICES, gpus=$num_gpu_devices, batch_size=$batch_size" + # 防止checkpoints冲突 + rm -rf results/checkpoints + # 如需开启特殊优化flag、参数请注明 + + train_cmd="" + case ${run_mode} in + sp) train_cmd="python -u tools/train.py "${train_cmd} ;; + mp) + train_cmd="python -m paddle.distributed.launch --log_dir=./mylog --gpus=$CUDA_VISIBLE_DEVICES tools/train.py "${train_cmd} + log_parse_file="mylog/workerlog.0" ;; + *) echo "choose run_mode(sp or mp)"; exit 1; + esac + + timeout 15m ${train_cmd} > ${log_file} 2>&1 + if [ $? -ne 0 ];then + echo -e "${model_name}, FAIL" + export job_fail_flag=1 + else + echo -e "${model_name}, SUCCESS" + export job_fail_flag=0 + fi + kill -9 `ps -ef|grep 'python'|awk '{print $2}'` + + if [ $run_mode = "mp" -a -d mylog ]; then + rm ${log_file} + cp mylog/workerlog.0 ${log_file} + fi +} +function _analysis_log(){ + python analysis_log.py ${log_file} ${index_log_file} # 分析log产出待入库的json 文件 +} + +_set_params $@ +_train +_analysis_log + diff --git a/OtherFrame/ocr/PyTorch/README_demo.md b/OtherFrame/ocr/PyTorch/README_demo.md new file mode 100644 index 0000000000..467a78553d --- /dev/null +++ b/OtherFrame/ocr/PyTorch/README_demo.md @@ -0,0 +1,90 @@ +# NGC PyTorch 性能复现 +## 本readme仅为示例,相关内容请勿更新到此, NLP_demo也仅为示例 +## 目录 + +├── PrepareEnv.sh # 竞品PyTorch运行环境搭建 +├── README.md # 运行文档 +├── models # 提供竞品PyTorch框架的修改后的模型,官方模型请直接在脚本中拉取,统一方向的模型commit应一致,如不一致请单独在模型运行脚本中写明运行的commit +├── run_PyTorch.sh # 全量竞品PyTorch框架模型运行脚本 +└── scripts # 提供各个模型复现性能的脚本 +## 环境介绍 +### 1.物理机环境 +- 单机(单卡、8卡) + - 系统:CentOS release 7.5 (Final) + - GPU:Tesla V100-SXM2-32GB * 8 + - CPU:Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz * 80 + - Driver Version: 460.27.04 + - 内存:629 GB + - CUDA、cudnn Version: cuda10.1-cudnn7 、 cuda11.2-cudnn8-gcc82 +- 多机(32卡) TODO +### 2.Docker 镜像,如: + +NGC PyTorch 的代码仓库提供了自动构建 Docker 镜像的 [Dockerfile](https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/Translation/Transformer/Dockerfile), + +- **镜像版本**: `nvcr.io/nvidia/pytorch:20.06-py3` # 竞品镜像,每个方向的请一致 +- **PyTorch 版本**: `1.6.0a0+9907a3e` # 竞品版本:最新稳定版本,如需特定版本请备注说明原因 +- **CUDA 版本**: `11.2` +- **cuDnn 版本**: `8.0.1` + +## 测试步骤 +```bash +bash run_PyTorch.sh; # 创建容器,在该标准环境中测试模型 +``` +脚本内容,如: +```bash +#!/usr/bin/env bash +# 拉镜像 +ImageName= ; +docker pull ${ImageName} +# 启动镜像后测试单个模型 +run_cmd="bash PrepareEnv.sh; + cd /workspace/models/NLP/nlp_modelName/; + cp /workspace/scripts/NLP/nlp_modelName/preData.sh ./; + cp /workspace/scripts/NLP/nlp_modelName/run_benchmark.sh ./; + cp /workspace/scripts/NLP/nlp_modelName/analysis_log.py ./; + CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh sp 32 fp32 500; + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh sp 64 fp16 500; + " +# 启动镜像 +nvidia-docker run --name test_torch -it \ + --net=host \ + --shm-size=1g \ + -v $PWD:/workspace \ + ${ImageName} /bin/bash -c "${run_cmd}" + +``` +## 单个模型脚本目录 + +└── nlp_modelName # 模型名 + ├── README.md # 运行文档 + ├── analysis_log.py # log解析脚本,每个框架尽量统一,可参考[paddle的analysis.py](https://github.com/mmglove/benchmark/blob/jp_0907/scripts/analysis.py) + ├── logs # 训练log,注:log中不得包含机器ip等敏感信息 + │   ├── index # log解析后待入库数据json文件 + │   │   ├── nlp_modelName_sp_bs32_fp32_1_speed # 单卡数据 + │   │   └── nlp_modelName_mp_bs32_fp32_8_speed # 8卡数据 + │   └── train_log # 原始训练log + ├── preData.sh # 数据处理 + └── run_benchmark.sh # 运行脚本(包含性能、收敛性) + +## 输出 + +每个模型case需返回log解析后待入库数据json文件 + +```bash +{ +"log_file": "/logs/2021.0906.211134.post107/train_log/ResNet101_bs32_1_1_sp", \ # log 目录,创建规范见PrepareEnv.sh +"model_name": "clas_MobileNetv1_bs32_fp32", \ # 模型case名,创建规范:repoName_模型名_bs${bs_item}_${fp_item} 如:clas_MobileNetv1_bs32_fp32 +"mission_name": "图像分类", \ # 模型case所属任务名称,具体可参考scripts/config.ini +"direction_id": 0, \ # 模型case所属方向id,0:CV|1:NLP|2:Rec 具体可参考benchmark/scripts/config.ini +"run_mode": "sp", \ # 单卡:sp|多卡:mp +"index": 1, \ # 速度验证默认为1 +"gpu_num": 1, \ # 1|8 +"FINAL_RESULT": 197.514, \ # 速度计算后的平均值,需要skip掉不稳定的前几步值 +"JOB_FAIL_FLAG": 0, \ # 该模型case运行0:成功|1:失败 +"UNIT": "images/s" \ # 速度指标的单位 +} + +``` + + + diff --git a/OtherFrame/ocr/PyTorch/models/README_demo.md b/OtherFrame/ocr/PyTorch/models/README_demo.md new file mode 100644 index 0000000000..7beff87dc3 --- /dev/null +++ b/OtherFrame/ocr/PyTorch/models/README_demo.md @@ -0,0 +1,3 @@ +# NGC PyTorch 性能复现 +## 本readme仅为示例,相关内容请勿更新到此, +## models目录下存放模型代码,以以子库方式合入 diff --git a/OtherFrame/ocr/PyTorch/scripts/README_demo.md b/OtherFrame/ocr/PyTorch/scripts/README_demo.md new file mode 100644 index 0000000000..463728480a --- /dev/null +++ b/OtherFrame/ocr/PyTorch/scripts/README_demo.md @@ -0,0 +1,39 @@ +# NGC PyTorch 性能复现 +## 本readme仅为示例,相关内容请勿更新到此, NLP_demo也仅为示例 +## 目录 + +## 单个模型脚本目录 + +└── nlp_modelName # 模型名 + ├── README.md # 运行文档 + ├── analysis_log.py # log解析脚本,每个框架尽量统一,可参考[paddle的analysis.py](https://github.com/mmglove/benchmark/blob/jp_0907/scripts/analysis.py) + ├── logs # 训练log,注:log中不得包含机器ip等敏感信息 + │   ├── index # log解析后待入库数据json文件 + │   │   ├── nlp_modelName_sp_bs32_fp32_1_speed # 单卡数据 + │   │   └── nlp_modelName_mp_bs32_fp32_8_speed # 8卡数据 + │   └── train_log # 原始训练log + ├── preData.sh # 数据处理 + └── run_benchmark.sh # 运行脚本(包含性能、收敛性) + +## 输出 + +每个模型case需返回log解析后待入库数据json文件 + +```bash +{ +"log_file": "/logs/2021.0906.211134.post107/train_log/ResNet101_bs32_1_1_sp", \ # log 目录,创建规范见PrepareEnv.sh +"model_name": "clas_MobileNetv1_bs32_fp32", \ # 模型case名,创建规范:repoName_模型名_bs${bs_item}_${fp_item} 如:clas_MobileNetv1_bs32_fp32 +"mission_name": "图像分类", \ # 模型case所属任务名称,具体可参考scripts/config.ini +"direction_id": 0, \ # 模型case所属方向id,0:CV|1:NLP|2:Rec 具体可参考benchmark/scripts/config.ini +"run_mode": "sp", \ # 单卡:sp|多卡:mp +"index": 1, \ # 速度验证默认为1 +"gpu_num": 1, \ # 1|8 +"FINAL_RESULT": 197.514, \ # 速度计算后的平均值,需要skip掉不稳定的前几步值 +"JOB_FAIL_FLAG": 0, \ # 该模型case运行0:成功|1:失败 +"UNIT": "images/s" \ # 速度指标的单位 +} + +``` + + + diff --git a/OtherFrame/seg/PyTorch/README_demo.md b/OtherFrame/seg/PyTorch/README_demo.md new file mode 100644 index 0000000000..467a78553d --- /dev/null +++ b/OtherFrame/seg/PyTorch/README_demo.md @@ -0,0 +1,90 @@ +# NGC PyTorch 性能复现 +## 本readme仅为示例,相关内容请勿更新到此, NLP_demo也仅为示例 +## 目录 + +├── PrepareEnv.sh # 竞品PyTorch运行环境搭建 +├── README.md # 运行文档 +├── models # 提供竞品PyTorch框架的修改后的模型,官方模型请直接在脚本中拉取,统一方向的模型commit应一致,如不一致请单独在模型运行脚本中写明运行的commit +├── run_PyTorch.sh # 全量竞品PyTorch框架模型运行脚本 +└── scripts # 提供各个模型复现性能的脚本 +## 环境介绍 +### 1.物理机环境 +- 单机(单卡、8卡) + - 系统:CentOS release 7.5 (Final) + - GPU:Tesla V100-SXM2-32GB * 8 + - CPU:Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz * 80 + - Driver Version: 460.27.04 + - 内存:629 GB + - CUDA、cudnn Version: cuda10.1-cudnn7 、 cuda11.2-cudnn8-gcc82 +- 多机(32卡) TODO +### 2.Docker 镜像,如: + +NGC PyTorch 的代码仓库提供了自动构建 Docker 镜像的 [Dockerfile](https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/Translation/Transformer/Dockerfile), + +- **镜像版本**: `nvcr.io/nvidia/pytorch:20.06-py3` # 竞品镜像,每个方向的请一致 +- **PyTorch 版本**: `1.6.0a0+9907a3e` # 竞品版本:最新稳定版本,如需特定版本请备注说明原因 +- **CUDA 版本**: `11.2` +- **cuDnn 版本**: `8.0.1` + +## 测试步骤 +```bash +bash run_PyTorch.sh; # 创建容器,在该标准环境中测试模型 +``` +脚本内容,如: +```bash +#!/usr/bin/env bash +# 拉镜像 +ImageName= ; +docker pull ${ImageName} +# 启动镜像后测试单个模型 +run_cmd="bash PrepareEnv.sh; + cd /workspace/models/NLP/nlp_modelName/; + cp /workspace/scripts/NLP/nlp_modelName/preData.sh ./; + cp /workspace/scripts/NLP/nlp_modelName/run_benchmark.sh ./; + cp /workspace/scripts/NLP/nlp_modelName/analysis_log.py ./; + CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh sp 32 fp32 500; + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh sp 64 fp16 500; + " +# 启动镜像 +nvidia-docker run --name test_torch -it \ + --net=host \ + --shm-size=1g \ + -v $PWD:/workspace \ + ${ImageName} /bin/bash -c "${run_cmd}" + +``` +## 单个模型脚本目录 + +└── nlp_modelName # 模型名 + ├── README.md # 运行文档 + ├── analysis_log.py # log解析脚本,每个框架尽量统一,可参考[paddle的analysis.py](https://github.com/mmglove/benchmark/blob/jp_0907/scripts/analysis.py) + ├── logs # 训练log,注:log中不得包含机器ip等敏感信息 + │   ├── index # log解析后待入库数据json文件 + │   │   ├── nlp_modelName_sp_bs32_fp32_1_speed # 单卡数据 + │   │   └── nlp_modelName_mp_bs32_fp32_8_speed # 8卡数据 + │   └── train_log # 原始训练log + ├── preData.sh # 数据处理 + └── run_benchmark.sh # 运行脚本(包含性能、收敛性) + +## 输出 + +每个模型case需返回log解析后待入库数据json文件 + +```bash +{ +"log_file": "/logs/2021.0906.211134.post107/train_log/ResNet101_bs32_1_1_sp", \ # log 目录,创建规范见PrepareEnv.sh +"model_name": "clas_MobileNetv1_bs32_fp32", \ # 模型case名,创建规范:repoName_模型名_bs${bs_item}_${fp_item} 如:clas_MobileNetv1_bs32_fp32 +"mission_name": "图像分类", \ # 模型case所属任务名称,具体可参考scripts/config.ini +"direction_id": 0, \ # 模型case所属方向id,0:CV|1:NLP|2:Rec 具体可参考benchmark/scripts/config.ini +"run_mode": "sp", \ # 单卡:sp|多卡:mp +"index": 1, \ # 速度验证默认为1 +"gpu_num": 1, \ # 1|8 +"FINAL_RESULT": 197.514, \ # 速度计算后的平均值,需要skip掉不稳定的前几步值 +"JOB_FAIL_FLAG": 0, \ # 该模型case运行0:成功|1:失败 +"UNIT": "images/s" \ # 速度指标的单位 +} + +``` + + + diff --git a/OtherFrame/seg/PyTorch/models/README_demo.md b/OtherFrame/seg/PyTorch/models/README_demo.md new file mode 100644 index 0000000000..7beff87dc3 --- /dev/null +++ b/OtherFrame/seg/PyTorch/models/README_demo.md @@ -0,0 +1,3 @@ +# NGC PyTorch 性能复现 +## 本readme仅为示例,相关内容请勿更新到此, +## models目录下存放模型代码,以以子库方式合入 diff --git a/OtherFrame/seg/PyTorch/scripts/README_demo.md b/OtherFrame/seg/PyTorch/scripts/README_demo.md new file mode 100644 index 0000000000..463728480a --- /dev/null +++ b/OtherFrame/seg/PyTorch/scripts/README_demo.md @@ -0,0 +1,39 @@ +# NGC PyTorch 性能复现 +## 本readme仅为示例,相关内容请勿更新到此, NLP_demo也仅为示例 +## 目录 + +## 单个模型脚本目录 + +└── nlp_modelName # 模型名 + ├── README.md # 运行文档 + ├── analysis_log.py # log解析脚本,每个框架尽量统一,可参考[paddle的analysis.py](https://github.com/mmglove/benchmark/blob/jp_0907/scripts/analysis.py) + ├── logs # 训练log,注:log中不得包含机器ip等敏感信息 + │   ├── index # log解析后待入库数据json文件 + │   │   ├── nlp_modelName_sp_bs32_fp32_1_speed # 单卡数据 + │   │   └── nlp_modelName_mp_bs32_fp32_8_speed # 8卡数据 + │   └── train_log # 原始训练log + ├── preData.sh # 数据处理 + └── run_benchmark.sh # 运行脚本(包含性能、收敛性) + +## 输出 + +每个模型case需返回log解析后待入库数据json文件 + +```bash +{ +"log_file": "/logs/2021.0906.211134.post107/train_log/ResNet101_bs32_1_1_sp", \ # log 目录,创建规范见PrepareEnv.sh +"model_name": "clas_MobileNetv1_bs32_fp32", \ # 模型case名,创建规范:repoName_模型名_bs${bs_item}_${fp_item} 如:clas_MobileNetv1_bs32_fp32 +"mission_name": "图像分类", \ # 模型case所属任务名称,具体可参考scripts/config.ini +"direction_id": 0, \ # 模型case所属方向id,0:CV|1:NLP|2:Rec 具体可参考benchmark/scripts/config.ini +"run_mode": "sp", \ # 单卡:sp|多卡:mp +"index": 1, \ # 速度验证默认为1 +"gpu_num": 1, \ # 1|8 +"FINAL_RESULT": 197.514, \ # 速度计算后的平均值,需要skip掉不稳定的前几步值 +"JOB_FAIL_FLAG": 0, \ # 该模型case运行0:成功|1:失败 +"UNIT": "images/s" \ # 速度指标的单位 +} + +``` + + + diff --git a/OtherFrame/video/PyTorch/README_demo.md b/OtherFrame/video/PyTorch/README_demo.md new file mode 100644 index 0000000000..467a78553d --- /dev/null +++ b/OtherFrame/video/PyTorch/README_demo.md @@ -0,0 +1,90 @@ +# NGC PyTorch 性能复现 +## 本readme仅为示例,相关内容请勿更新到此, NLP_demo也仅为示例 +## 目录 + +├── PrepareEnv.sh # 竞品PyTorch运行环境搭建 +├── README.md # 运行文档 +├── models # 提供竞品PyTorch框架的修改后的模型,官方模型请直接在脚本中拉取,统一方向的模型commit应一致,如不一致请单独在模型运行脚本中写明运行的commit +├── run_PyTorch.sh # 全量竞品PyTorch框架模型运行脚本 +└── scripts # 提供各个模型复现性能的脚本 +## 环境介绍 +### 1.物理机环境 +- 单机(单卡、8卡) + - 系统:CentOS release 7.5 (Final) + - GPU:Tesla V100-SXM2-32GB * 8 + - CPU:Intel(R) Xeon(R) Gold 6271C CPU @ 2.60GHz * 80 + - Driver Version: 460.27.04 + - 内存:629 GB + - CUDA、cudnn Version: cuda10.1-cudnn7 、 cuda11.2-cudnn8-gcc82 +- 多机(32卡) TODO +### 2.Docker 镜像,如: + +NGC PyTorch 的代码仓库提供了自动构建 Docker 镜像的 [Dockerfile](https://github.com/NVIDIA/DeepLearningExamples/blob/master/PyTorch/Translation/Transformer/Dockerfile), + +- **镜像版本**: `nvcr.io/nvidia/pytorch:20.06-py3` # 竞品镜像,每个方向的请一致 +- **PyTorch 版本**: `1.6.0a0+9907a3e` # 竞品版本:最新稳定版本,如需特定版本请备注说明原因 +- **CUDA 版本**: `11.2` +- **cuDnn 版本**: `8.0.1` + +## 测试步骤 +```bash +bash run_PyTorch.sh; # 创建容器,在该标准环境中测试模型 +``` +脚本内容,如: +```bash +#!/usr/bin/env bash +# 拉镜像 +ImageName= ; +docker pull ${ImageName} +# 启动镜像后测试单个模型 +run_cmd="bash PrepareEnv.sh; + cd /workspace/models/NLP/nlp_modelName/; + cp /workspace/scripts/NLP/nlp_modelName/preData.sh ./; + cp /workspace/scripts/NLP/nlp_modelName/run_benchmark.sh ./; + cp /workspace/scripts/NLP/nlp_modelName/analysis_log.py ./; + CUDA_VISIBLE_DEVICES=0 bash run_benchmark.sh sp 32 fp32 500; + CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 bash run_benchmark.sh sp 64 fp16 500; + " +# 启动镜像 +nvidia-docker run --name test_torch -it \ + --net=host \ + --shm-size=1g \ + -v $PWD:/workspace \ + ${ImageName} /bin/bash -c "${run_cmd}" + +``` +## 单个模型脚本目录 + +└── nlp_modelName # 模型名 + ├── README.md # 运行文档 + ├── analysis_log.py # log解析脚本,每个框架尽量统一,可参考[paddle的analysis.py](https://github.com/mmglove/benchmark/blob/jp_0907/scripts/analysis.py) + ├── logs # 训练log,注:log中不得包含机器ip等敏感信息 + │   ├── index # log解析后待入库数据json文件 + │   │   ├── nlp_modelName_sp_bs32_fp32_1_speed # 单卡数据 + │   │   └── nlp_modelName_mp_bs32_fp32_8_speed # 8卡数据 + │   └── train_log # 原始训练log + ├── preData.sh # 数据处理 + └── run_benchmark.sh # 运行脚本(包含性能、收敛性) + +## 输出 + +每个模型case需返回log解析后待入库数据json文件 + +```bash +{ +"log_file": "/logs/2021.0906.211134.post107/train_log/ResNet101_bs32_1_1_sp", \ # log 目录,创建规范见PrepareEnv.sh +"model_name": "clas_MobileNetv1_bs32_fp32", \ # 模型case名,创建规范:repoName_模型名_bs${bs_item}_${fp_item} 如:clas_MobileNetv1_bs32_fp32 +"mission_name": "图像分类", \ # 模型case所属任务名称,具体可参考scripts/config.ini +"direction_id": 0, \ # 模型case所属方向id,0:CV|1:NLP|2:Rec 具体可参考benchmark/scripts/config.ini +"run_mode": "sp", \ # 单卡:sp|多卡:mp +"index": 1, \ # 速度验证默认为1 +"gpu_num": 1, \ # 1|8 +"FINAL_RESULT": 197.514, \ # 速度计算后的平均值,需要skip掉不稳定的前几步值 +"JOB_FAIL_FLAG": 0, \ # 该模型case运行0:成功|1:失败 +"UNIT": "images/s" \ # 速度指标的单位 +} + +``` + + + diff --git a/OtherFrame/video/PyTorch/models/README_demo.md b/OtherFrame/video/PyTorch/models/README_demo.md new file mode 100644 index 0000000000..7beff87dc3 --- /dev/null +++ b/OtherFrame/video/PyTorch/models/README_demo.md @@ -0,0 +1,3 @@ +# NGC PyTorch 性能复现 +## 本readme仅为示例,相关内容请勿更新到此, +## models目录下存放模型代码,以以子库方式合入 diff --git a/OtherFrame/video/PyTorch/scripts/README_demo.md b/OtherFrame/video/PyTorch/scripts/README_demo.md new file mode 100644 index 0000000000..463728480a --- /dev/null +++ b/OtherFrame/video/PyTorch/scripts/README_demo.md @@ -0,0 +1,39 @@ +# NGC PyTorch 性能复现 +## 本readme仅为示例,相关内容请勿更新到此, NLP_demo也仅为示例 +## 目录 + +## 单个模型脚本目录 + +└── nlp_modelName # 模型名 + ├── README.md # 运行文档 + ├── analysis_log.py # log解析脚本,每个框架尽量统一,可参考[paddle的analysis.py](https://github.com/mmglove/benchmark/blob/jp_0907/scripts/analysis.py) + ├── logs # 训练log,注:log中不得包含机器ip等敏感信息 + │   ├── index # log解析后待入库数据json文件 + │   │   ├── nlp_modelName_sp_bs32_fp32_1_speed # 单卡数据 + │   │   └── nlp_modelName_mp_bs32_fp32_8_speed # 8卡数据 + │   └── train_log # 原始训练log + ├── preData.sh # 数据处理 + └── run_benchmark.sh # 运行脚本(包含性能、收敛性) + +## 输出 + +每个模型case需返回log解析后待入库数据json文件 + +```bash +{ +"log_file": "/logs/2021.0906.211134.post107/train_log/ResNet101_bs32_1_1_sp", \ # log 目录,创建规范见PrepareEnv.sh +"model_name": "clas_MobileNetv1_bs32_fp32", \ # 模型case名,创建规范:repoName_模型名_bs${bs_item}_${fp_item} 如:clas_MobileNetv1_bs32_fp32 +"mission_name": "图像分类", \ # 模型case所属任务名称,具体可参考scripts/config.ini +"direction_id": 0, \ # 模型case所属方向id,0:CV|1:NLP|2:Rec 具体可参考benchmark/scripts/config.ini +"run_mode": "sp", \ # 单卡:sp|多卡:mp +"index": 1, \ # 速度验证默认为1 +"gpu_num": 1, \ # 1|8 +"FINAL_RESULT": 197.514, \ # 速度计算后的平均值,需要skip掉不稳定的前几步值 +"JOB_FAIL_FLAG": 0, \ # 该模型case运行0:成功|1:失败 +"UNIT": "images/s" \ # 速度指标的单位 +} + +``` + + +