From ec81574a211c2b316901300f6e079bff817caec9 Mon Sep 17 00:00:00 2001
From: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
Date: Fri, 11 Sep 2020 11:39:07 -0500
Subject: [PATCH] metrics: Add tensorflow fp32 inference container benchmark

This PR adds the tensorflow fp32 inference benchmark, now we will be
able to launch a container with this benchmark.

Fixes #2849

Signed-off-by: Gabriela Cervantes <gabriela.cervantes.tellez@intel.com>
---
 .../Dockerfile_fp32_inference/Dockerfile      |  31 ++++
 .../tensorflow_fp32_inference.sh              | 151 ++++++++++++++++++
 2 files changed, 182 insertions(+)
 create mode 100644 metrics/machine_learning/Dockerfile_fp32_inference/Dockerfile
 create mode 100755 metrics/machine_learning/tensorflow_fp32_inference.sh

diff --git a/metrics/machine_learning/Dockerfile_fp32_inference/Dockerfile b/metrics/machine_learning/Dockerfile_fp32_inference/Dockerfile
new file mode 100644
index 000000000..d07c68f87
--- /dev/null
+++ b/metrics/machine_learning/Dockerfile_fp32_inference/Dockerfile
@@ -0,0 +1,31 @@
+# Copyright (c) 2020 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# Set up an image with 'tensorflow' installed
+# for inference fp32 model
+
+# Usage: FROM [image name]
+FROM intelaipg/intel-optimized-tensorflow:1.15.0-mkl-py3
+
+# Version of the Dockerfile
+LABEL DOCKERFILE_VERSION="1.0"
+
+ENV TEST_DIRECTORY "tensorflow"
+ENV MODELS_URL "https://github.com/tensorflow/models.git"
+ENV INTELAI_URL "https://github.com/IntelAI/models.git"
+ENV PRETRAINED_MODEL_URL "https://storage.googleapis.com/intel-optimized-tensorflow/models/v1_5/inceptionv3_fp32_pretrained_model.pb"
+ENV GRAPH_MODEL_URL "https://zenodo.org/record/2535873/files/resnet50_v1.pb"
+ENV INTELAI_VERSION "v1.5.0"
+
+RUN apt-get update && \
+	apt-get install -y git numactl google-perftools wget && \
+	apt-get remove -y unattended-upgrades && \
+	mkdir ${TEST_DIRECTORY} && cd ${TEST_DIRECTORY} && \
+	git clone --depth 1 ${MODELS_URL} /${TEST_DIRECTORY}/models && \
+	git clone -b ${INTELAI_VERSION} --depth 1 ${INTELAI_URL} /${TEST_DIRECTORY}/intel_models && \
+	wget -q -P /${TEST_DIRECTORY} ${PRETRAINED_MODEL_URL} && \
+	wget -q -P /${TEST_DIRECTORY} ${GRAPH_MODEL_URL}
+
+WORKDIR ${TEST_DIRECTORY}
+CMD ["/bin/bash"]
diff --git a/metrics/machine_learning/tensorflow_fp32_inference.sh b/metrics/machine_learning/tensorflow_fp32_inference.sh
new file mode 100755
index 000000000..8efca736f
--- /dev/null
+++ b/metrics/machine_learning/tensorflow_fp32_inference.sh
@@ -0,0 +1,151 @@
+#!/bin/bash
+#
+# Copyright (c) 2020 Intel Corporation
+#
+# SPDX-License-Identifier: Apache-2.0
+
+# Description of the test:
+# This test runs the 'tensorflow benchmark'
+# inside a container for the fp32 inference model
+# https://github.com/IntelAI/models/blob/master/benchmarks/image_recognition/tensorflow/resnet50v1_5/
+set -e
+
+# General env
+SCRIPT_PATH=$(dirname "$(readlink -f "$0")")
+source "${SCRIPT_PATH}/../lib/common.bash"
+
+TEST_NAME="tensorflow_fp32_inference_container"
+IMAGE="tensorflow_fp32_inference"
+DOCKERFILE="${SCRIPT_PATH}/Dockerfile_fp32_inference/Dockerfile"
+CI_JOB="${CI_JOB:-""}"
+MODEL="resnet50"
+MODEL_VERSION="${MODEL}v1_5"
+PRE_TRAINED_MODEL_FILE="${MODEL}_v1.pb"
+FRAMEWORK="tensorflow"
+BATCH_SIZE="10"
+PRECISION="fp32"
+MODE="inference"
+http_proxy="${http_proxy:-}"
+https_proxy="${https_proxy:-}"
+TEST_DIRECTORY="tensorflow"
+BENCHMARK_DIRECTORY="/${TEST_DIRECTORY}/intel_models/benchmarks"
+RESULTS_DIR=$(mktemp --tmpdir -d results.XXXXXXXXXX)
+TMP_FILE="/tmp/results"
+CMD_GET_FILE="find ${TMP_FILE} -type f -name '*.log'"
+CMD="mkdir ${TMP_FILE} && python3 ${BENCHMARK_DIRECTORY}/launch_benchmark.py --in-graph /${TEST_DIRECTORY}/${PRE_TRAINED_MODEL_FILE} \
+--model-name ${MODEL_VERSION} --framework ${FRAMEWORK} --precision ${PRECISION} --mode ${MODE} --batch-size ${BATCH_SIZE} \
+--output-dir=${TMP_FILE} --benchmark-only && sleep 10"
+launch_timeout="10"
+log_timeout="10"
+file_timeout="200"
+
+remove_tmp_dir() {
+	rm -rf "${RESULTS_DIR}"
+}
+
+trap remove_tmp_dir EXIT
+
+save_config(){
+	metrics_json_start_array
+
+	local json="$(cat << EOF
+	{
+		"image": "${IMAGE}",
+		"framework": "${FRAMEWORK}",
+		"batch size": "${BATCH_SIZE}",
+		"precision": "${PRECISION}",
+		"model": "${MODEL}"
+	}
+EOF
+)"
+	metrics_json_add_array_element "$json"
+	metrics_json_end_array "Config"
+}
+
+function main() {
+	cmds=("docker")
+
+	init_env
+	check_cmds "${cmds[@]}"
+	check_dockerfiles_images "${IMAGE}" "${DOCKERFILE}"
+	metrics_json_init
+	save_config
+	docker run -tid --env http_proxy="${http_proxy}" --env https_proxy="${https_proxy}" --runtime="${RUNTIME}" "${IMAGE}" sh -c "${CMD}"
+	CONTAINERS_ID=$(docker ps -a --format "table {{.ID}}" | tail -n +2)
+
+	# Verify that container is running
+	echo "Verify that container is running"
+	for i in $(seq "${launch_timeout}") ; do
+		containers_launched=$(docker ps -a | grep "${IMAGE}" | grep "Up" | wc -l)
+		if [ "${containers_launched}" -eq 1 ]; then
+			echo "Container is running"
+			break
+		fi
+		sleep 1
+		[ "${i}" == "${launch_timeout}" ] && return 1
+	done
+
+	# Verify that inside the tensorflow container the results log file has been created
+	echo "Check that results log file is being created"
+	for i in $(seq "${log_timeout}") ; do
+		file_existance="ls ${TMP_FILE} | wc -l"
+		check_file=$(docker exec "${CONTAINERS_ID}" sh -c "${file_existance}")
+		# We need a number of retries as the results log is not created inmediately
+		# when the tensorflow benchmark is being launched
+		number_retries="10"
+		for i in $(seq 0 "${number_retries}"); do
+			if [ "${check_file}" -eq 1 ]; then
+				get_file_name=$(docker exec "${CONTAINERS_ID}" sh -c "${CMD_GET_FILE}")
+				break
+			else
+				sleep 1
+			fi
+		done
+		if [ ! -z "${check_file}" ]; then
+			echo "Tensorflow log has been created"
+			break
+		fi
+		sleep 1
+		[ "${i}" == "${log_timeout}" ] && return 1
+	done
+
+	# We need to verify that the tensorflow benchmark results are completed as the log is created
+	# but is being populated while the benchmark is running, this will ensure that we get the
+	# complete log
+	for i in $(seq "${file_timeout}") ; do
+		check_results=$(docker exec "${CONTAINERS_ID}" sh -c "cat ${get_file_name}")
+		echo "${check_results}" > LOG
+		verify_results_generation=$(echo "${check_results}" | grep "Throughput" | wc -l)
+		if [ "${verify_results_generation}" -eq 1 ]; then
+ 			echo "Results were generated"
+			break
+		fi
+		sleep 1
+		[ "${i}" == "${file_timeout}" ] && return 1
+	done
+
+	docker cp "${CONTAINERS_ID}:${get_file_name}" "${RESULTS_DIR}"
+	pushd "${RESULTS_DIR}"
+	log_name=$(echo "${get_file_name}" | cut -d '/' -f4)
+	cat "${log_name}" >> "results"
+	popd
+
+	metrics_json_start_array
+
+	local output=$(cat "$RESULTS_DIR/results")
+	local average=$(echo "${output}" | grep "Average time" | cut -d ':' -f2 | sed -e 's/^[ \t]*//')
+	local throughput=$(echo "${output}" | grep "Throughput" | cut -d ':' -f2| sed -e 's/^[ \t]*//')
+	local json="$(cat << EOF
+	{
+		"Average Time" : "${average}",
+		"Throughput" : "${throughput}"
+	}
+EOF
+)"
+	metrics_json_add_array_element "$json"
+	metrics_json_end_array "Results"
+	metrics_json_save
+	docker rm -f $(docker ps -qa)
+	clean_env
+}
+main "$@"