forked from kata-containers/tests
-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
metrics: Add tensorflow fp32 inference container benchmark
This PR adds the tensorflow fp32 inference benchmark, now we will be able to launch a container with this benchmark. Fixes kata-containers#2849 Signed-off-by: Gabriela Cervantes <[email protected]>
- Loading branch information
Showing
2 changed files
with
182 additions
and
0 deletions.
There are no files selected for viewing
31 changes: 31 additions & 0 deletions
31
metrics/machine_learning/Dockerfile_fp32_inference/Dockerfile
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# Copyright (c) 2020 Intel Corporation | ||
# | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
# Set up an image with 'tensorflow' installed | ||
# for inference fp32 model | ||
|
||
# Usage: FROM [image name] | ||
FROM intelaipg/intel-optimized-tensorflow:1.15.0-mkl-py3 | ||
|
||
# Version of the Dockerfile | ||
LABEL DOCKERFILE_VERSION="1.0" | ||
|
||
ENV TEST_DIRECTORY "tensorflow" | ||
ENV MODELS_URL "https://github.com/tensorflow/models.git" | ||
ENV INTELAI_URL "https://github.com/IntelAI/models.git" | ||
ENV PRETRAINED_MODEL_URL "https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/inceptionv3_fp32_pretrained_model.pb" | ||
ENV GRAPH_MODEL_URL "https://zenodo.org/record/2535873/files/resnet50_v1.pb" | ||
ENV INTELAI_VERSION "v1.5.0" | ||
|
||
RUN apt-get update && \ | ||
apt-get install -y git numactl google-perftools wget && \ | ||
apt-get remove -y unattended-upgrades && \ | ||
mkdir ${TEST_DIRECTORY} && cd ${TEST_DIRECTORY} && \ | ||
git clone --depth 1 ${MODELS_URL} /${TEST_DIRECTORY}/models && \ | ||
git clone -b ${INTELAI_VERSION} --depth 1 ${INTELAI_URL} /${TEST_DIRECTORY}/intel_models && \ | ||
wget -q -P /${TEST_DIRECTORY} ${PRETRAINED_MODEL_URL} && \ | ||
wget -q -P /${TEST_DIRECTORY} ${GRAPH_MODEL_URL} | ||
|
||
WORKDIR ${TEST_DIRECTORY} | ||
CMD ["/bin/bash"] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,151 @@ | ||
#!/bin/bash | ||
# | ||
# Copyright (c) 2020 Intel Corporation | ||
# | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
# Description of the test: | ||
# This test runs the 'tensorflow benchmark' | ||
# inside a container for the fp32 inference model | ||
# https://github.com/IntelAI/models/blob/master/benchmarks/image_recognition/tensorflow/resnet50v1_5/ | ||
set -e | ||
|
||
# General env | ||
SCRIPT_PATH=$(dirname "$(readlink -f "$0")") | ||
source "${SCRIPT_PATH}/../lib/common.bash" | ||
|
||
TEST_NAME="tensorflow_fp32_inference_container" | ||
IMAGE="tensorflow_fp32_inference" | ||
DOCKERFILE="${SCRIPT_PATH}/Dockerfile_fp32_inference/Dockerfile" | ||
CI_JOB="${CI_JOB:-""}" | ||
MODEL="resnet50" | ||
MODEL_VERSION="${MODEL}v1_5" | ||
PRE_TRAINED_MODEL_FILE="${MODEL}_v1.pb" | ||
FRAMEWORK="tensorflow" | ||
BATCH_SIZE="10" | ||
PRECISION="fp32" | ||
MODE="inference" | ||
http_proxy="${http_proxy:-}" | ||
https_proxy="${https_proxy:-}" | ||
TEST_DIRECTORY="tensorflow" | ||
BENCHMARK_DIRECTORY="/${TEST_DIRECTORY}/intel_models/benchmarks" | ||
RESULTS_DIR=$(mktemp --tmpdir -d results.XXXXXXXXXX) | ||
TMP_FILE="/tmp/results" | ||
CMD_GET_FILE="find ${TMP_FILE} -type f -name '*.log'" | ||
CMD="mkdir ${TMP_FILE} && python3 ${BENCHMARK_DIRECTORY}/launch_benchmark.py --in-graph /${TEST_DIRECTORY}/${PRE_TRAINED_MODEL_FILE} \ | ||
--model-name ${MODEL_VERSION} --framework ${FRAMEWORK} --precision ${PRECISION} --mode ${MODE} --batch-size ${BATCH_SIZE} \ | ||
--output-dir=${TMP_FILE} --benchmark-only && sleep 10" | ||
launch_timeout="10" | ||
log_timeout="10" | ||
file_timeout="200" | ||
|
||
remove_tmp_dir() { | ||
rm -rf "${RESULTS_DIR}" | ||
} | ||
|
||
trap remove_tmp_dir EXIT | ||
|
||
save_config(){ | ||
metrics_json_start_array | ||
|
||
local json="$(cat << EOF | ||
{ | ||
"image": "${IMAGE}", | ||
"framework": "${FRAMEWORK}", | ||
"batch size": "${BATCH_SIZE}", | ||
"precision": "${PRECISION}", | ||
"model": "${MODEL}" | ||
} | ||
EOF | ||
)" | ||
metrics_json_add_array_element "$json" | ||
metrics_json_end_array "Config" | ||
} | ||
|
||
function main() { | ||
cmds=("docker") | ||
|
||
init_env | ||
check_cmds "${cmds[@]}" | ||
check_dockerfiles_images "${IMAGE}" "${DOCKERFILE}" | ||
metrics_json_init | ||
save_config | ||
docker run -tid --env http_proxy="${http_proxy}" --env https_proxy="${https_proxy}" --runtime="${RUNTIME}" "${IMAGE}" sh -c "${CMD}" | ||
CONTAINERS_ID=$(docker ps -a --format "table {{.ID}}" | tail -n +2) | ||
|
||
# Verify that container is running | ||
echo "Verify that container is running" | ||
for i in $(seq "${launch_timeout}") ; do | ||
containers_launched=$(docker ps -a | grep "${IMAGE}" | grep "Up" | wc -l) | ||
if [ "${containers_launched}" -eq 1 ]; then | ||
echo "Container is running" | ||
break | ||
fi | ||
sleep 1 | ||
[ "${i}" == "${launch_timeout}" ] && return 1 | ||
done | ||
|
||
# Verify that inside the tensorflow container the results log file has been created | ||
echo "Check that results log file is being created" | ||
for i in $(seq "${log_timeout}") ; do | ||
file_existance="ls ${TMP_FILE} | wc -l" | ||
check_file=$(docker exec "${CONTAINERS_ID}" sh -c "${file_existance}") | ||
# We need a number of retries as the results log is not created inmediately | ||
# when the tensorflow benchmark is being launched | ||
number_retries="10" | ||
for i in $(seq 0 "${number_retries}"); do | ||
if [ "${check_file}" -eq 1 ]; then | ||
get_file_name=$(docker exec "${CONTAINERS_ID}" sh -c "${CMD_GET_FILE}") | ||
break | ||
else | ||
sleep 1 | ||
fi | ||
done | ||
if [ ! -z "${check_file}" ]; then | ||
echo "Tensorflow log has been created" | ||
break | ||
fi | ||
sleep 1 | ||
[ "${i}" == "${log_timeout}" ] && return 1 | ||
done | ||
|
||
# We need to verify that the tensorflow benchmark results are completed as the log is created | ||
# but is being populated while the benchmark is running, this will ensure that we get the | ||
# complete log | ||
for i in $(seq "${file_timeout}") ; do | ||
check_results=$(docker exec "${CONTAINERS_ID}" sh -c "cat ${get_file_name}") | ||
echo "${check_results}" > LOG | ||
verify_results_generation=$(echo "${check_results}" | grep "Throughput" | wc -l) | ||
if [ "${verify_results_generation}" -eq 1 ]; then | ||
echo "Results were generated" | ||
break | ||
fi | ||
sleep 1 | ||
[ "${i}" == "${file_timeout}" ] && return 1 | ||
done | ||
|
||
docker cp "${CONTAINERS_ID}:${get_file_name}" "${RESULTS_DIR}" | ||
pushd "${RESULTS_DIR}" | ||
log_name=$(echo "${get_file_name}" | cut -d '/' -f4) | ||
cat "${log_name}" >> "results" | ||
popd | ||
|
||
metrics_json_start_array | ||
|
||
local output=$(cat "$RESULTS_DIR/results") | ||
local average=$(echo "${output}" | grep "Average time" | cut -d ':' -f2 | sed -e 's/^[ \t]*//') | ||
local throughput=$(echo "${output}" | grep "Throughput" | cut -d ':' -f2| sed -e 's/^[ \t]*//') | ||
local json="$(cat << EOF | ||
{ | ||
"Average Time" : "${average}", | ||
"Throughput" : "${throughput}" | ||
} | ||
EOF | ||
)" | ||
metrics_json_add_array_element "$json" | ||
metrics_json_end_array "Results" | ||
metrics_json_save | ||
docker rm -f $(docker ps -qa) | ||
clean_env | ||
} | ||
main "$@" |