Skip to content

Commit

Permalink
metrics: Add tensorflow fp32 inference container benchmark
Browse files Browse the repository at this point in the history
This PR adds the tensorflow fp32 inference benchmark, now we will be
able to launch a container with this benchmark.

Fixes kata-containers#2849

Signed-off-by: Gabriela Cervantes <[email protected]>
  • Loading branch information
GabyCT committed Sep 11, 2020
1 parent a5a7c59 commit ec81574
Show file tree
Hide file tree
Showing 2 changed files with 182 additions and 0 deletions.
31 changes: 31 additions & 0 deletions metrics/machine_learning/Dockerfile_fp32_inference/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Copyright (c) 2020 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

# Set up an image with 'tensorflow' installed
# for inference fp32 model

# Usage: FROM [image name]
FROM intelaipg/intel-optimized-tensorflow:1.15.0-mkl-py3

# Version of the Dockerfile
LABEL DOCKERFILE_VERSION="1.0"

ENV TEST_DIRECTORY "tensorflow"
ENV MODELS_URL "https://github.com/tensorflow/models.git"
ENV INTELAI_URL "https://github.com/IntelAI/models.git"
ENV PRETRAINED_MODEL_URL "https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/inceptionv3_fp32_pretrained_model.pb"
ENV GRAPH_MODEL_URL "https://zenodo.org/record/2535873/files/resnet50_v1.pb"
ENV INTELAI_VERSION "v1.5.0"

RUN apt-get update && \
apt-get install -y git numactl google-perftools wget && \
apt-get remove -y unattended-upgrades && \
mkdir ${TEST_DIRECTORY} && cd ${TEST_DIRECTORY} && \
git clone --depth 1 ${MODELS_URL} /${TEST_DIRECTORY}/models && \
git clone -b ${INTELAI_VERSION} --depth 1 ${INTELAI_URL} /${TEST_DIRECTORY}/intel_models && \
wget -q -P /${TEST_DIRECTORY} ${PRETRAINED_MODEL_URL} && \
wget -q -P /${TEST_DIRECTORY} ${GRAPH_MODEL_URL}

WORKDIR ${TEST_DIRECTORY}
CMD ["/bin/bash"]
151 changes: 151 additions & 0 deletions metrics/machine_learning/tensorflow_fp32_inference.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
#!/bin/bash
#
# Copyright (c) 2020 Intel Corporation
#
# SPDX-License-Identifier: Apache-2.0

# Description of the test:
# This test runs the 'tensorflow benchmark'
# inside a container for the fp32 inference model
# https://github.com/IntelAI/models/blob/master/benchmarks/image_recognition/tensorflow/resnet50v1_5/
set -e

# General env
SCRIPT_PATH=$(dirname "$(readlink -f "$0")")
source "${SCRIPT_PATH}/../lib/common.bash"

TEST_NAME="tensorflow_fp32_inference_container"
IMAGE="tensorflow_fp32_inference"
DOCKERFILE="${SCRIPT_PATH}/Dockerfile_fp32_inference/Dockerfile"
CI_JOB="${CI_JOB:-""}"
MODEL="resnet50"
MODEL_VERSION="${MODEL}v1_5"
PRE_TRAINED_MODEL_FILE="${MODEL}_v1.pb"
FRAMEWORK="tensorflow"
BATCH_SIZE="10"
PRECISION="fp32"
MODE="inference"
http_proxy="${http_proxy:-}"
https_proxy="${https_proxy:-}"
TEST_DIRECTORY="tensorflow"
BENCHMARK_DIRECTORY="/${TEST_DIRECTORY}/intel_models/benchmarks"
RESULTS_DIR=$(mktemp --tmpdir -d results.XXXXXXXXXX)
TMP_FILE="/tmp/results"
CMD_GET_FILE="find ${TMP_FILE} -type f -name '*.log'"
CMD="mkdir ${TMP_FILE} && python3 ${BENCHMARK_DIRECTORY}/launch_benchmark.py --in-graph /${TEST_DIRECTORY}/${PRE_TRAINED_MODEL_FILE} \
--model-name ${MODEL_VERSION} --framework ${FRAMEWORK} --precision ${PRECISION} --mode ${MODE} --batch-size ${BATCH_SIZE} \
--output-dir=${TMP_FILE} --benchmark-only && sleep 10"
launch_timeout="10"
log_timeout="10"
file_timeout="200"

remove_tmp_dir() {
rm -rf "${RESULTS_DIR}"
}

trap remove_tmp_dir EXIT

save_config(){
metrics_json_start_array

local json="$(cat << EOF
{
"image": "${IMAGE}",
"framework": "${FRAMEWORK}",
"batch size": "${BATCH_SIZE}",
"precision": "${PRECISION}",
"model": "${MODEL}"
}
EOF
)"
metrics_json_add_array_element "$json"
metrics_json_end_array "Config"
}

function main() {
cmds=("docker")

init_env
check_cmds "${cmds[@]}"
check_dockerfiles_images "${IMAGE}" "${DOCKERFILE}"
metrics_json_init
save_config
docker run -tid --env http_proxy="${http_proxy}" --env https_proxy="${https_proxy}" --runtime="${RUNTIME}" "${IMAGE}" sh -c "${CMD}"
CONTAINERS_ID=$(docker ps -a --format "table {{.ID}}" | tail -n +2)

# Verify that container is running
echo "Verify that container is running"
for i in $(seq "${launch_timeout}") ; do
containers_launched=$(docker ps -a | grep "${IMAGE}" | grep "Up" | wc -l)
if [ "${containers_launched}" -eq 1 ]; then
echo "Container is running"
break
fi
sleep 1
[ "${i}" == "${launch_timeout}" ] && return 1
done

# Verify that inside the tensorflow container the results log file has been created
echo "Check that results log file is being created"
for i in $(seq "${log_timeout}") ; do
file_existance="ls ${TMP_FILE} | wc -l"
check_file=$(docker exec "${CONTAINERS_ID}" sh -c "${file_existance}")
# We need a number of retries as the results log is not created inmediately
# when the tensorflow benchmark is being launched
number_retries="10"
for i in $(seq 0 "${number_retries}"); do
if [ "${check_file}" -eq 1 ]; then
get_file_name=$(docker exec "${CONTAINERS_ID}" sh -c "${CMD_GET_FILE}")
break
else
sleep 1
fi
done
if [ ! -z "${check_file}" ]; then
echo "Tensorflow log has been created"
break
fi
sleep 1
[ "${i}" == "${log_timeout}" ] && return 1
done

# We need to verify that the tensorflow benchmark results are completed as the log is created
# but is being populated while the benchmark is running, this will ensure that we get the
# complete log
for i in $(seq "${file_timeout}") ; do
check_results=$(docker exec "${CONTAINERS_ID}" sh -c "cat ${get_file_name}")
echo "${check_results}" > LOG
verify_results_generation=$(echo "${check_results}" | grep "Throughput" | wc -l)
if [ "${verify_results_generation}" -eq 1 ]; then
echo "Results were generated"
break
fi
sleep 1
[ "${i}" == "${file_timeout}" ] && return 1
done

docker cp "${CONTAINERS_ID}:${get_file_name}" "${RESULTS_DIR}"
pushd "${RESULTS_DIR}"
log_name=$(echo "${get_file_name}" | cut -d '/' -f4)
cat "${log_name}" >> "results"
popd

metrics_json_start_array

local output=$(cat "$RESULTS_DIR/results")
local average=$(echo "${output}" | grep "Average time" | cut -d ':' -f2 | sed -e 's/^[ \t]*//')
local throughput=$(echo "${output}" | grep "Throughput" | cut -d ':' -f2| sed -e 's/^[ \t]*//')
local json="$(cat << EOF
{
"Average Time" : "${average}",
"Throughput" : "${throughput}"
}
EOF
)"
metrics_json_add_array_element "$json"
metrics_json_end_array "Results"
metrics_json_save
docker rm -f $(docker ps -qa)
clean_env
}
main "$@"

0 comments on commit ec81574

Please sign in to comment.