diff --git a/training/kunlunxin/faster_rcnn-pytorch/README.md b/training/kunlunxin/faster_rcnn-pytorch/README.md new file mode 100644 index 000000000..8035e3b89 --- /dev/null +++ b/training/kunlunxin/faster_rcnn-pytorch/README.md @@ -0,0 +1,46 @@ +### 模型Checkpoint下载 +[模型Checkpoint下载](../../benchmarks/faster_rcnn/README.md#Resnet50预训练权重) +### 测试数据集下载 +[测试数据集下载](../../benchmarks/faster_rcnn/README.md#数据集) + +### 昆仑芯XPU配置与运行信息参考 +#### 环境配置 +- ##### 硬件环境 + - 机器型号: 昆仑芯AI加速器组R480-X8 + - 加速卡型号: 昆仑芯AI加速卡R300 + - 多机网络类型、带宽: InfiniBand,200Gb/s + +- ##### 软件环境 + - OS版本:Ubuntu 20.04 + - OS kernel版本: 5.4.0-26-generic + - 加速卡驱动版本:4.0.25 + - Docker镜像和版本:pytorch1.12.1-cpu-ubuntu18.04:v0.04 + - 训练框架版本:xmlir+e70db8f6 + - 依赖软件版本:pytorch-1.12.1+cpu + + +* 通用指标 + +| 指标名称 | 指标值 | 特殊说明 | +|--------------|-------------------------|---------------------------------------------| +| 任务类别 | 图像目标检测 | | +| 模型 | fasterRCNN | | +| 数据集 | coco2017 | | +| 数据精度 | precision,见“性能指标” | 可选fp32/amp/fp16 | +| 超参修改 | fix_hp,见“性能指标” | 跑满硬件设备评测吞吐量所需特殊超参 | +| 硬件设备简称 | kunlunxin R300 | | +| 硬件存储使用 | mem,见“性能指标” | 通常称为“显存”,单位为GiB | +| 端到端时间 | e2e_time,见“性能指标” | 总时间+Perf初始化等时间 | +| 总吞吐量 | p_whole,见“性能指标” | 实际训练图片数除以总时间(performance_whole) | +| 训练吞吐量 | p_train,见“性能指标” | 不包含每个epoch末尾的评估部分耗时 | +| **计算吞吐量** | **p_core,见“性能指标”** | 不包含数据IO部分的耗时(p3>p2>p1) | +| 训练结果 | map,见“性能指标” | 单位为平均目标检测正确率 | +| 额外修改项 | 无 | | + +* 性能指标 + +| 配置 | precision | fix_hp | e2e_time | p_whole | p_train | p_core | map | mem | +|----------------|-----------|---------------|----------|---------|---------|--------|-------|-----------| +| R300单机单卡(1x1) | fp32 | bs=16,lr=0.16 | | | | | | 17.0/32.0 | +| R300单机8卡(1x8) | fp32 | bs=16,lr=0.16 | | | | | 36.4% | 28.0/32.0 | +| R300两机8卡(2x8) | fp32 | bs=16,lr=0.16 | | | | | | 15.0/32.0 | diff --git a/training/kunlunxin/faster_rcnn-pytorch/config/config_R200x1x1.py b/training/kunlunxin/faster_rcnn-pytorch/config/config_R200x1x1.py deleted file mode 100644 index 23f715329..000000000 --- a/training/kunlunxin/faster_rcnn-pytorch/config/config_R200x1x1.py +++ /dev/null @@ -1,4 +0,0 @@ -vendor: str = "kunlunxin" -train_batch_size = 8 -eval_batch_size = 8 -lr = 0.16 diff --git a/training/kunlunxin/faster_rcnn-pytorch/config/config_R200x1x8.py b/training/kunlunxin/faster_rcnn-pytorch/config/config_R200x1x8.py deleted file mode 100644 index 23f715329..000000000 --- a/training/kunlunxin/faster_rcnn-pytorch/config/config_R200x1x8.py +++ /dev/null @@ -1,4 +0,0 @@ -vendor: str = "kunlunxin" -train_batch_size = 8 -eval_batch_size = 8 -lr = 0.16 diff --git a/training/kunlunxin/faster_rcnn-pytorch/config/config_R200x2x8.py b/training/kunlunxin/faster_rcnn-pytorch/config/config_R200x2x8.py deleted file mode 100644 index 23f715329..000000000 --- a/training/kunlunxin/faster_rcnn-pytorch/config/config_R200x2x8.py +++ /dev/null @@ -1,4 +0,0 @@ -vendor: str = "kunlunxin" -train_batch_size = 8 -eval_batch_size = 8 -lr = 0.16 diff --git a/training/kunlunxin/faster_rcnn-pytorch/config/config_R300x1x1.py b/training/kunlunxin/faster_rcnn-pytorch/config/config_R300x1x1.py new file mode 100644 index 000000000..e9d4cb6ca --- /dev/null +++ b/training/kunlunxin/faster_rcnn-pytorch/config/config_R300x1x1.py @@ -0,0 +1,4 @@ +vendor: str = "kunlunxin" +train_batch_size = 16 +eval_batch_size = 16 +lr = 0.16 diff --git a/training/kunlunxin/faster_rcnn-pytorch/config/config_R300x1x8.py b/training/kunlunxin/faster_rcnn-pytorch/config/config_R300x1x8.py new file mode 100644 index 000000000..e9d4cb6ca --- /dev/null +++ b/training/kunlunxin/faster_rcnn-pytorch/config/config_R300x1x8.py @@ -0,0 +1,4 @@ +vendor: str = "kunlunxin" +train_batch_size = 16 +eval_batch_size = 16 +lr = 0.16 diff --git a/training/kunlunxin/faster_rcnn-pytorch/config/config_R300x2x8.py b/training/kunlunxin/faster_rcnn-pytorch/config/config_R300x2x8.py new file mode 100644 index 000000000..e9d4cb6ca --- /dev/null +++ b/training/kunlunxin/faster_rcnn-pytorch/config/config_R300x2x8.py @@ -0,0 +1,4 @@ +vendor: str = "kunlunxin" +train_batch_size = 16 +eval_batch_size = 16 +lr = 0.16 diff --git a/training/nvidia/faster_rcnn-pytorch/README.md b/training/nvidia/faster_rcnn-pytorch/README.md index 27b5d3b73..ac83f7e65 100644 --- a/training/nvidia/faster_rcnn-pytorch/README.md +++ b/training/nvidia/faster_rcnn-pytorch/README.md @@ -55,5 +55,5 @@ torchvision.models.resnet.__dict__['model_urls'][ | ------------------ | --------- | ---- | ---- | ---- | ---- | ---- | ---- | ---- | | A100单机8卡(1x8) | fp32 | / | 14086 | 144 | 150 | 199 | 37.0% | 7.9/40.0 | | A100单机8卡(1x8) | fp32 | bs=16,lr=0.16 | 11848 | 187 | 199 | 298 | 36.6% | 38.8/40.0 | -| A100单机8卡(1x1) | fp32 | bs=16,lr=0.16 | | 26 | 27 | 45 | |33.7/40.0 | +| A100单机单卡(1x1) | fp32 | bs=16,lr=0.16 | | 26 | 27 | 45 | |33.7/40.0 | | A100两机8卡(2x8) | fp32 | bs=16,lr=0.16 | | 351 | 383 | 601 | | 39.0/40.0 |