This repository has been archived by the owner on Dec 17, 2021. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 853
/
Copy pathrun-cloud
executable file
·121 lines (107 loc) · 2.91 KB
/
run-cloud
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#!/bin/bash
# Copyright 2019 Google Inc. All Rights Reserved. Licensed under the Apache
# License, Version 2.0 (the "License"); you may not use this file except in
# compliance with the License. You may obtain a copy of the License at
# http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
# License for the specific language governing permissions and limitations under
# the License.
set -e
# Parse command line arguments
unset WORK_DIR
MAX_DATA_FILES=5
PROJECT=$(gcloud config get-value project || echo $PROJECT)
REGION=us-central1
while [[ $# -gt 0 ]]; do
case $1 in
--work-dir)
WORK_DIR=$2
shift
;;
--max-data-files)
MAX_DATA_FILES=$2
shift
;;
--project)
PROJECT=$2
shift
;;
--region)
REGION=$2
shift
;;
*)
echo "error: unrecognized argument $1"
exit 1
;;
esac
shift
done
if [[ -z $WORK_DIR ]]; then
echo "error: argument --work-dir is required"
exit 1
fi
if [[ $WORK_DIR != gs://* ]]; then
echo "error: --work-dir must be a Google Cloud Storage path"
echo " example: gs://your-bucket/cloudml-samples/molecules"
exit 1
fi
if [[ -z $PROJECT ]]; then
echo 'error: --project is required to run in Google Cloud Platform.'
exit 1
fi
# Wrapper function to print the command being run
function run {
echo "$ $@"
"$@"
}
# Extract the data files
echo '>> Extracting data'
run python data-extractor.py \
--work-dir $WORK_DIR \
--max-data-files $MAX_DATA_FILES
echo ''
# Preprocess the datasets using Apache Beam's DataflowRunner
echo '>> Preprocessing'
run python preprocess.py \
--project $PROJECT \
--runner DataflowRunner \
--temp_location $WORK_DIR/beam-temp \
--setup_file ./setup.py \
--work-dir $WORK_DIR
echo ''
# Train and evaluate the model in AI Platform
echo '>> Training'
JOB="cloudml_samples_molecules_$(date +%Y%m%d_%H%M%S)"
BUCKET=$(echo $WORK_DIR | egrep -o gs://[^/]+)
run gcloud ai-platform jobs submit training $JOB \
--module-name trainer.task \
--package-path trainer \
--staging-bucket $BUCKET \
--runtime-version 1.13 \
--region $REGION \
--stream-logs \
-- \
--work-dir $WORK_DIR
echo ''
# Get the model path
EXPORT_DIR=$WORK_DIR/model/export/final
MODEL_DIR=$(gsutil ls -d "$EXPORT_DIR/*" | sort -r | head -n 1)
echo "Model: $MODEL_DIR"
echo ''
# Make batch predictions on SDF files
echo '>> Batch prediction'
run python predict.py \
--work-dir $WORK_DIR \
--model-dir $MODEL_DIR \
batch \
--project $PROJECT \
--runner DataflowRunner \
--temp_location $WORK_DIR/beam-temp \
--setup_file ./setup.py \
--inputs-dir $WORK_DIR/data \
--outputs-dir $WORK_DIR/predictions
# Display some predictions
gsutil cat $WORK_DIR/predictions/* | head -n 10