From 90c34812ff2e51c8efd395b9075575b717311ce8 Mon Sep 17 00:00:00 2001 From: Yuki Iwai Date: Sat, 4 Jun 2022 04:34:26 +0900 Subject: [PATCH] Upgrade the Tensorflow version to address some security issues (#1870) * upgrade the tensorflow version to address some security issues * fix enas example codes * upgrade tensorflow to v2.9.1 and tensorflow-aarch64 to v2.9.0 * install protobuf (>= 3.9.2, < 3.20) for tensorflow-aarch64 --- .../tfevent-metricscollector/Dockerfile | 8 ------ .../tfevent-metricscollector/requirements.txt | 6 +++++ cmd/suggestion/chocolate/v1beta1/Dockerfile | 3 --- .../chocolate/v1beta1/requirements.txt | 3 ++- cmd/suggestion/nas/enas/v1beta1/Dockerfile | 7 ------ .../nas/enas/v1beta1/requirements.txt | 8 ++++-- .../enas-cnn-cifar10/Dockerfile.cpu | 2 +- .../enas-cnn-cifar10/Dockerfile.gpu | 2 +- .../trial-images/enas-cnn-cifar10/RunTrial.py | 25 ++++++++++++------- .../tf-mnist-with-summaries/Dockerfile | 2 +- 10 files changed, 33 insertions(+), 33 deletions(-) diff --git a/cmd/metricscollector/v1beta1/tfevent-metricscollector/Dockerfile b/cmd/metricscollector/v1beta1/tfevent-metricscollector/Dockerfile index f94e7be7ca8..b07c4dc17a9 100644 --- a/cmd/metricscollector/v1beta1/tfevent-metricscollector/Dockerfile +++ b/cmd/metricscollector/v1beta1/tfevent-metricscollector/Dockerfile @@ -2,19 +2,11 @@ FROM python:3.9 ENV TARGET_DIR /opt/katib ENV METRICS_COLLECTOR_DIR cmd/metricscollector/v1beta1/tfevent-metricscollector -# tensorflow community build for aarch64 -# https://github.com/tensorflow/build#tensorflow-builds -ENV PIP_EXTRA_INDEX_URL https://snapshots.linaro.org/ldcg/python-cache/ ADD ./pkg/ ${TARGET_DIR}/pkg/ ADD ./${METRICS_COLLECTOR_DIR}/ ${TARGET_DIR}/${METRICS_COLLECTOR_DIR}/ WORKDIR ${TARGET_DIR}/${METRICS_COLLECTOR_DIR} -RUN if [ "$(uname -m)" = "aarch64" ]; then \ - pip install tensorflow-aarch64==2.7.0; \ - else \ - pip install tensorflow==2.7.0; \ - fi; RUN pip install --no-cache-dir -r requirements.txt RUN chgrp -R 0 ${TARGET_DIR} \ diff --git a/cmd/metricscollector/v1beta1/tfevent-metricscollector/requirements.txt b/cmd/metricscollector/v1beta1/tfevent-metricscollector/requirements.txt index ab8a014d528..dbd2a0f1df7 100644 --- a/cmd/metricscollector/v1beta1/tfevent-metricscollector/requirements.txt +++ b/cmd/metricscollector/v1beta1/tfevent-metricscollector/requirements.txt @@ -2,3 +2,9 @@ psutil==5.8.0 rfc3339>=6.2 grpcio==1.41.1 googleapis-common-protos==1.6.0 +# TODO (tenzen-y): We need to delete the line to install protobuf after tensorflow-aarch64 v2.9.1, or higher has been released. +# To avoid the `If this call came from a _pb2.py file, your generated code is out of date and must be regenerated with protoc >= 3.19.0.` error, +# we must restrict the protobuf version. +protobuf >= 3.9.2, < 3.20; platform_machine=="aarch64" +tensorflow==2.9.1; platform_machine=="x86_64" +tensorflow-aarch64==2.9.0; platform_machine=="aarch64" diff --git a/cmd/suggestion/chocolate/v1beta1/Dockerfile b/cmd/suggestion/chocolate/v1beta1/Dockerfile index 2662c015dfb..404a82e3671 100644 --- a/cmd/suggestion/chocolate/v1beta1/Dockerfile +++ b/cmd/suggestion/chocolate/v1beta1/Dockerfile @@ -22,9 +22,6 @@ RUN if [ "$(uname -m)" = "ppc64le" ]; then \ ADD ./pkg/ ${TARGET_DIR}/pkg/ ADD ./${SUGGESTION_DIR}/ ${TARGET_DIR}/${SUGGESTION_DIR}/ WORKDIR ${TARGET_DIR}/${SUGGESTION_DIR} -RUN if [ "$(uname -m)" = "aarch64" ]; then \ - sed -i -e '$a git+https://github.com/fmder/ghalton@master' -e '/^ghalton/d' requirements.txt; \ - fi; RUN pip install --no-cache-dir -r requirements.txt RUN chgrp -R 0 ${TARGET_DIR} \ diff --git a/cmd/suggestion/chocolate/v1beta1/requirements.txt b/cmd/suggestion/chocolate/v1beta1/requirements.txt index 8466aa7d7c4..522f268993d 100644 --- a/cmd/suggestion/chocolate/v1beta1/requirements.txt +++ b/cmd/suggestion/chocolate/v1beta1/requirements.txt @@ -8,5 +8,6 @@ protobuf==3.19.1 googleapis-common-protos==1.6.0 SQLAlchemy==1.4.26 git+https://github.com/AIworx-Labs/chocolate@master -ghalton>=0.6.2 +ghalton>=0.6.2; platform_machine=="x86_64" +git+https://github.com/fmder/ghalton@master; platform_machine=="aarch64" cython>=0.29.24 diff --git a/cmd/suggestion/nas/enas/v1beta1/Dockerfile b/cmd/suggestion/nas/enas/v1beta1/Dockerfile index 597868d80f0..e8bc38006ce 100644 --- a/cmd/suggestion/nas/enas/v1beta1/Dockerfile +++ b/cmd/suggestion/nas/enas/v1beta1/Dockerfile @@ -3,9 +3,6 @@ FROM python:3.9 ENV TARGET_DIR /opt/katib ENV SUGGESTION_DIR cmd/suggestion/nas/enas/v1beta1 ENV GRPC_HEALTH_PROBE_VERSION v0.4.11 -# tensorflow community build for aarch64 -# https://github.com/tensorflow/build#tensorflow-builds -ENV PIP_EXTRA_INDEX_URL https://snapshots.linaro.org/ldcg/python-cache/ RUN if [ "$(uname -m)" = "ppc64le" ] || [ "$(uname -m)" = "aarch64" ]; then \ apt-get -y update && \ @@ -26,10 +23,6 @@ RUN if [ "$(uname -m)" = "ppc64le" ]; then \ ADD ./pkg/ ${TARGET_DIR}/pkg/ ADD ./${SUGGESTION_DIR}/ ${TARGET_DIR}/${SUGGESTION_DIR}/ WORKDIR ${TARGET_DIR}/${SUGGESTION_DIR} - -RUN if [ "$(uname -m)" = "aarch64" ]; then \ - sed -i 's/tensorflow==/tensorflow-aarch64==/' requirements.txt; \ - fi; RUN pip install --no-cache-dir -r requirements.txt RUN chgrp -R 0 ${TARGET_DIR} \ diff --git a/cmd/suggestion/nas/enas/v1beta1/requirements.txt b/cmd/suggestion/nas/enas/v1beta1/requirements.txt index a94eec30e45..c184b3eaa5c 100644 --- a/cmd/suggestion/nas/enas/v1beta1/requirements.txt +++ b/cmd/suggestion/nas/enas/v1beta1/requirements.txt @@ -1,5 +1,9 @@ grpcio==1.41.1 -protobuf==3.19.1 googleapis-common-protos==1.6.0 -tensorflow==2.8.0 cython>=0.29.24 +# TODO (tenzen-y): We need to delete the line to install protobuf after tensorflow-aarch64 v2.9.1, or higher has been released. +# To avoid the `If this call came from a _pb2.py file, your generated code is out of date and must be regenerated with protoc >= 3.19.0.` error, +# we must restrict the protobuf version. +protobuf >= 3.9.2, < 3.20; platform_machine=="aarch64" +tensorflow==2.9.1; platform_machine=="x86_64" +tensorflow-aarch64==2.9.0; platform_machine=="aarch64" diff --git a/examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.cpu b/examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.cpu index 30af4f77020..7ac771602d9 100644 --- a/examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.cpu +++ b/examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.cpu @@ -1,4 +1,4 @@ -FROM tensorflow/tensorflow:2.7.0 +FROM tensorflow/tensorflow:2.9.1 ENV TARGET_DIR /opt/enas-cnn-cifar10 diff --git a/examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.gpu b/examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.gpu index 316ddf8a8fe..32e36d50dd4 100644 --- a/examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.gpu +++ b/examples/v1beta1/trial-images/enas-cnn-cifar10/Dockerfile.gpu @@ -1,4 +1,4 @@ -FROM tensorflow/tensorflow:2.7.0-gpu +FROM tensorflow/tensorflow:2.9.1-gpu ENV TARGET_DIR /opt/enas-cnn-cifar10 diff --git a/examples/v1beta1/trial-images/enas-cnn-cifar10/RunTrial.py b/examples/v1beta1/trial-images/enas-cnn-cifar10/RunTrial.py index f2868b237ff..6558e37fb83 100644 --- a/examples/v1beta1/trial-images/enas-cnn-cifar10/RunTrial.py +++ b/examples/v1beta1/trial-images/enas-cnn-cifar10/RunTrial.py @@ -16,8 +16,8 @@ from keras.datasets import cifar10 from ModelConstructor import ModelConstructor from tensorflow.keras.utils import to_categorical -from tensorflow.python.keras.utils.multi_gpu_utils import multi_gpu_model from keras.preprocessing.image import ImageDataGenerator +import tensorflow as tf import argparse if __name__ == "__main__": @@ -50,16 +50,23 @@ print("\n>>> Constructing Model...") constructor = ModelConstructor(arch, nn_config) - test_model = constructor.build_model() - print(">>> Model Constructed Successfully\n") - if num_gpus > 1: - test_model = multi_gpu_model(test_model, gpus=num_gpus) + num_physical_gpus = len(tf.config.experimental.list_physical_devices('GPU')) + if 1 <= num_gpus <= num_physical_gpus: + devices = ["/gpu:"+str(i) for i in range(num_physical_gpus)] + else: + num_physical_cpu = len(tf.config.experimental.list_physical_devices('CPU')) + devices = ["/cpu:"+str(j) for j in range(num_physical_cpu)] + + strategy = tf.distribute.MirroredStrategy(devices) + with strategy.scope(): + test_model = constructor.build_model() + test_model.summary() + test_model.compile(loss=keras.losses.categorical_crossentropy, + optimizer=keras.optimizers.Adam(learning_rate=1e-3, decay=1e-4), + metrics=['accuracy']) - test_model.summary() - test_model.compile(loss=keras.losses.categorical_crossentropy, - optimizer=keras.optimizers.Adam(learning_rate=1e-3, decay=1e-4), - metrics=['accuracy']) + print(">>> Model Constructed Successfully\n") (x_train, y_train), (x_test, y_test) = cifar10.load_data() x_train = x_train.astype('float32') diff --git a/examples/v1beta1/trial-images/tf-mnist-with-summaries/Dockerfile b/examples/v1beta1/trial-images/tf-mnist-with-summaries/Dockerfile index e54e4c80698..a481afebee5 100644 --- a/examples/v1beta1/trial-images/tf-mnist-with-summaries/Dockerfile +++ b/examples/v1beta1/trial-images/tf-mnist-with-summaries/Dockerfile @@ -1,4 +1,4 @@ -FROM tensorflow/tensorflow:2.7.0 +FROM tensorflow/tensorflow:2.9.1 ADD examples/v1beta1/trial-images/tf-mnist-with-summaries /opt/tf-mnist-with-summaries WORKDIR /opt/tf-mnist-with-summaries