Skip to content
This repository has been archived by the owner on Jun 6, 2024. It is now read-only.

Commit

Permalink
[auto-test][example][tested] Add username support for auto-test to ke…
Browse files Browse the repository at this point in the history
…ep away path conflict (#1679)

* fix prepare.sh in mpi folder

* add horovod example && add auto-test support

* add horovod example

* fix json of horovod

* add sleep for horovod

* add ssh readiness detest shell script

* add ssh service detest script

* add hdfs support

* revise document of horovod

* revise examples document

* revise document

* fix review

* fix review

* fix review

* add username to auto-test for HDFS path

* revise json file for adding username path

* for test

* fix prepare.sh for adding username parameter

* finish test

* finish test

* review fix

* testing

* testing

* tested
  • Loading branch information
qyyy authored Nov 9, 2018
1 parent d685509 commit 29ba2cc
Show file tree
Hide file tree
Showing 13 changed files with 102 additions and 117 deletions.
4 changes: 2 additions & 2 deletions examples/auto-test/start_all_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def scan(self, rootpath):
if os.path.isdir(rootpath + '/' + item):
files = os.listdir(rootpath + '/' + item)
if "prepare.sh" in files: # run the prepare shell script to download the data and code, then upload them to hdfs
os.system("/bin/bash " + rootpath + '/' + item + '/' + "prepare.sh " + self.config.hdfs_url)
os.system("/bin/bash " + rootpath + '/' + item + '/' + "prepare.sh " + self.config.hdfs_url + " " + self.config.PAI_username)
for file in files:
if file.endswith(".json"):
filepath = rootpath + '/' + item + '/' + file
Expand All @@ -121,4 +121,4 @@ def scan(self, rootpath):
args = parser.parse_args()
config = Config(rest_server_url=args.rest_server_url, hdfs_url=args.hdfs_url, webhdfs_url=args.hdfs_url, PAI_username=args.PAI_username, PAI_password=args.PAI_password, jobs=args.jobs)
scanner = Scanner(args.threshold, config)
scanner.scan(args.path)
scanner.scan(args.path)
3 changes: 1 addition & 2 deletions examples/auto-test/test_all_examples.json.igr
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,7 @@
"taskNumber": 1,
"cpuNumber": 2,
"memoryMB": 8192,
"gpuNumber": 0,
"command": "git clone https://github.com/Microsoft/pai.git && mv pai pai_tmp && echo "S" | /bin/bash pai_tmp/examples/auto-test/start.sh normal http://10.20.30.40:9186/api/v1/user/your_username/ 10.20.30.40:9000 http://10.20.30.40:50070 username password && rm -rf pai_tmp"
"command": "git clone https://github.com/Microsoft/pai.git && mv pai pai_tmp && echo \"S\" | /bin/bash pai_tmp/examples/auto-test/start.sh normal http://10.20.30.40:9186/api/v1/user/your_username/ 10.20.30.40:9000 http://10.20.30.40:50070 username password && rm -rf pai_tmp"
}
]
}
Expand Down
6 changes: 3 additions & 3 deletions examples/cntk/cntk.g2p.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,9 @@
"jobName": "cntk-g2p",
"image": "openpai/pai.example.cntk",

"dataDir": "$PAI_DEFAULT_FS_URI/examples/cntk/data",
"outputDir": "$PAI_DEFAULT_FS_URI/examples/cntk/output",
"codeDir": "$PAI_DEFAULT_FS_URI/examples/cntk/code",
"dataDir": "$PAI_DEFAULT_FS_URI/$PAI_USERNAME/examples/cntk/data",
"outputDir": "$PAI_DEFAULT_FS_URI/$PAI_USERNAME/examples/cntk/output",
"codeDir": "$PAI_DEFAULT_FS_URI/$PAI_USERNAME/examples/cntk/code",

"taskRoles": [
{
Expand Down
45 changes: 19 additions & 26 deletions examples/cntk/prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ function prepare_data(){

#download data
echo "Downloading cntk data, waiting..."
mkdir cntk_data && cd cntk_data
wget https://github.com/Microsoft/CNTK/raw/master/Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b
wget https://github.com/Microsoft/CNTK/raw/master/Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.mapping
wget https://github.com/Microsoft/CNTK/raw/master/Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.test
Expand All @@ -17,22 +18,14 @@ function prepare_data(){
wget https://github.com/Microsoft/CNTK/raw/master/Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.train-dev-20-21.ctf
wget https://github.com/Microsoft/CNTK/raw/master/Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.train-dev-20-21.txt
wget https://github.com/Microsoft/CNTK/raw/master/Examples/SequenceToSequence/CMUDict/Data/tiny.ctf

cd ..

#upload data to HDFS
echo "Uploading cntk data, waiting..."
hdfs dfs -put cmudict-0.7b hdfs://$1/examples/cntk/data
hdfs dfs -put cmudict-0.7b.mapping hdfs://$1/examples/cntk/data
hdfs dfs -put cmudict-0.7b.test hdfs://$1/examples/cntk/data
hdfs dfs -put cmudict-0.7b.test.ctf hdfs://$1/examples/cntk/data
hdfs dfs -put cmudict-0.7b.test.txt hdfs://$1/examples/cntk/data
hdfs dfs -put cmudict-0.7b.train hdfs://$1/examples/cntk/data
hdfs dfs -put cmudict-0.7b.train-dev-1-21 hdfs://$1/examples/cntk/data
hdfs dfs -put cmudict-0.7b.train-dev-1-21.ctf hdfs://$1/examples/cntk/data
hdfs dfs -put cmudict-0.7b.train-dev-1-21.txt hdfs://$1/examples/cntk/data
hdfs dfs -put cmudict-0.7b.train-dev-20-21 hdfs://$1/examples/cntk/data
hdfs dfs -put cmudict-0.7b.train-dev-20-21.ctf hdfs://$1/examples/cntk/data
hdfs dfs -put cmudict-0.7b.train-dev-20-21.txt hdfs://$1/examples/cntk/data
hdfs dfs -put tiny.ctf hdfs://$1/examples/cntk/data
for i in `ls cntk_data`
do
hdfs dfs -put cntk_data/$i hdfs://$1/$2/examples/cntk/data
done
}

function prepare_code(){
Expand All @@ -46,37 +39,37 @@ function prepare_code(){

#upload code to HDFS
echo "Uploading cntk code, waiting..."
hdfs dfs -put cntk-g2p.sh hdfs://$1/examples/cntk/code
hdfs dfs -put G2P.cntk hdfs://$1/examples/cntk/code
hdfs dfs -put cntk-g2p.sh hdfs://$1/$2/examples/cntk/code
hdfs dfs -put G2P.cntk hdfs://$1/$2/examples/cntk/code
}

if [ $# != 1 ]; then
echo "You must input hdfs socket as the only parameter! Or you cannot run this script correctly!"
if [ $# != 2 ]; then
echo "You must input hdfs socket and username as the only two parameters! Or you cannot run this script correctly!"
exit 1
fi

#make directory on HDFS
echo "Make cntk directory, waiting..."
hdfs dfs -mkdir -p hdfs://$1/examples/cntk/code
hdfs dfs -mkdir -p hdfs://$1/examples/cntk/data
hdfs dfs -mkdir -p hdfs://$1/examples/cntk/output
hdfs dfs -mkdir -p hdfs://$1/$2/examples/cntk/code
hdfs dfs -mkdir -p hdfs://$1/$2/examples/cntk/data
hdfs dfs -mkdir -p hdfs://$1/$2/examples/cntk/output

hdfs dfs -test -e hdfs://$1/examples/cntk/code/*
hdfs dfs -test -e hdfs://$1/$2/examples/cntk/code/*
if [ $? -eq 0 ] ;then
echo "Code exists on HDFS!"
else
prepare_code $1
prepare_code $1 $2
echo "Have prepared code!"
fi

hdfs dfs -test -e hdfs://$1/examples/cntk/data/*
hdfs dfs -test -e hdfs://$1/$2/examples/cntk/data/*
if [ $? -eq 0 ] ;then
echo "Data exists on HDFS!"
else
prepare_data $1
prepare_data $1 $2
echo "Have prepared data"
fi

#delete the files
rm cntk-g2p.sh* G2P.cntk* cmudict* tiny.ctf*
rm -rf cntk-g2p.sh* G2P.cntk* cntk_data/
echo "Prepare for the cntk example done!"
6 changes: 3 additions & 3 deletions examples/horovod/horovod.json
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
{
"jobName": "horovod-mpi-cifar10",
"image": "openpai/example.horovod.mpi",
"dataDir": "$PAI_DEFAULT_FS_URI/examples/tensorflow/distributed-cifar-10/data",
"outputDir": "$PAI_DEFAULT_FS_URI/examples/horovod/output",
"codeDir": "$PAI_DEFAULT_FS_URI/examples/horovod/code",
"dataDir": "$PAI_DEFAULT_FS_URI/$PAI_USERNAME/examples/tensorflow/distributed-cifar-10/data",
"outputDir": "$PAI_DEFAULT_FS_URI/$PAI_USERNAME/examples/horovod/output",
"codeDir": "$PAI_DEFAULT_FS_URI/$PAI_USERNAME/examples/horovod/code",
"taskRoles": [
{
"name": "main",
Expand Down
20 changes: 10 additions & 10 deletions examples/horovod/prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ function horovod_prepare_data(){
echo "Uploading cifar-10 data, waiting..."
for i in `ls cifar-10-batches-py`
do
hdfs dfs -put cifar-10-batches-py/$i hdfs://$1/examples/tensorflow/distributed-cifar-10/data
hdfs dfs -put cifar-10-batches-py/$i hdfs://$1/$2/examples/tensorflow/distributed-cifar-10/data
done
}

Expand All @@ -18,28 +18,28 @@ function horovod_prepare_code(){

#upload the code to HDFS
echo "Uploading benchmarks code, waiting..."
hdfs dfs -put benchmarks/ hdfs://$1/examples/horovod/code
hdfs dfs -put start.sh hdfs://$1/examples/horovod/code
hdfs dfs -put benchmarks/ hdfs://$1/$2/examples/horovod/code
hdfs dfs -put start.sh hdfs://$1/$2/examples/horovod/code
}

echo "Make horovod directory, waiting..."
hdfs dfs -mkdir -p hdfs://$1/examples/horovod/output
hdfs dfs -mkdir -p hdfs://$1/examples/horovod/code
hdfs dfs -mkdir -p hdfs://$1/examples/tensorflow/distributed-cifar-10/data
hdfs dfs -mkdir -p hdfs://$1/$2/examples/horovod/output
hdfs dfs -mkdir -p hdfs://$1/$2/examples/horovod/code
hdfs dfs -mkdir -p hdfs://$1/$2/examples/tensorflow/distributed-cifar-10/data

hdfs dfs -test -e hdfs://$1/examples/horovod/code/*
hdfs dfs -test -e hdfs://$1/$2/examples/horovod/code/*
if [ $? -eq 0 ] ;then
echo "Code exists on HDFS!"
else
horovod_prepare_code $1
horovod_prepare_code $1 $2
echo "Have prepared code!"
fi

hdfs dfs -test -e hdfs://$1/examples/tensorflow/distributed-cifar-10/data/*
hdfs dfs -test -e hdfs://$1/$2/examples/tensorflow/distributed-cifar-10/data/*
if [ $? -eq 0 ] ;then
echo "Data exists on HDFS!"
else
horovod_prepare_data $1
horovod_prepare_data $1 $2
echo "Have prepared data"
fi

Expand Down
6 changes: 3 additions & 3 deletions examples/mpi/cntk-mpi.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
"image": "openpai/pai.example.cntk-mpi",

// prepare cmudict corpus in CNTK format https://git.io/vbT5A and upload to hdfs
"dataDir": "$PAI_DEFAULT_FS_URI/examples/cntk/data",
"dataDir": "$PAI_DEFAULT_FS_URI/$PAI_USERNAME/examples/cntk/data",
// make a new dir for output on hdfs
"outputDir": "$PAI_DEFAULT_FS_URI/examples/mpi/cntk/output",
"outputDir": "$PAI_DEFAULT_FS_URI/$PAI_USERNAME/examples/mpi/cntk/output",
// prepare g2p distributed training script cntk-mpi.sh and upload to hdfs
"codeDir": "$PAI_DEFAULT_FS_URI/examples/mpi/cntk/code",
"codeDir": "$PAI_DEFAULT_FS_URI/$PAI_USERNAME/examples/mpi/cntk/code",
"virtualCluster": "default",

"taskRoles": [
Expand Down
61 changes: 27 additions & 34 deletions examples/mpi/prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ function mpi_cntk_prepare_data(){

#download data
echo "Downloading mpi cntk data, waiting..."
wget https://github.com/Microsoft/CNTK/raw/master/Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b
mkdir mpi_cntk_data && cd mpi_cntk_data
wget https://github.com/Microsoft/CNTK/raw/master/Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b
wget https://github.com/Microsoft/CNTK/raw/master/Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.mapping
wget https://github.com/Microsoft/CNTK/raw/master/Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.test
wget https://github.com/Microsoft/CNTK/raw/master/Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.test.ctf
Expand All @@ -18,22 +19,14 @@ function mpi_cntk_prepare_data(){
wget https://github.com/Microsoft/CNTK/raw/master/Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.train-dev-20-21.ctf
wget https://github.com/Microsoft/CNTK/raw/master/Examples/SequenceToSequence/CMUDict/Data/cmudict-0.7b.train-dev-20-21.txt
wget https://github.com/Microsoft/CNTK/raw/master/Examples/SequenceToSequence/CMUDict/Data/tiny.ctf
cd ..

#upload data to HDFS
echo "Uploading mpi cntk data, waiting..."
hdfs dfs -put cmudict-0.7b hdfs://$1/examples/cntk/data
hdfs dfs -put cmudict-0.7b.mapping hdfs://$1/examples/cntk/data
hdfs dfs -put cmudict-0.7b.test hdfs://$1/examples/cntk/data
hdfs dfs -put cmudict-0.7b.test.ctf hdfs://$1/examples/cntk/data
hdfs dfs -put cmudict-0.7b.test.txt hdfs://$1/examples/cntk/data
hdfs dfs -put cmudict-0.7b.train hdfs://$1/examples/cntk/data
hdfs dfs -put cmudict-0.7b.train-dev-1-21 hdfs://$1/examples/cntk/data
hdfs dfs -put cmudict-0.7b.train-dev-1-21.ctf hdfs://$1/examples/cntk/data
hdfs dfs -put cmudict-0.7b.train-dev-1-21.txt hdfs://$1/examples/cntk/data
hdfs dfs -put cmudict-0.7b.train-dev-20-21 hdfs://$1/examples/cntk/data
hdfs dfs -put cmudict-0.7b.train-dev-20-21.ctf hdfs://$1/examples/cntk/data
hdfs dfs -put cmudict-0.7b.train-dev-20-21.txt hdfs://$1/examples/cntk/data
hdfs dfs -put tiny.ctf hdfs://$1/examples/cntk/data
for i in `ls mpi_cntk_data`
do
hdfs dfs -put mpi_cntk_data/$i hdfs://$1/$2/examples/cntk/data
done
}


Expand All @@ -45,38 +38,38 @@ function mpi_cntk_prepare_code(){

#upload code to HDFS
echo "Uploading mpi cntk code, waiting..."
hdfs dfs -put cntk-mpi.sh hdfs://$1/examples/mpi/cntk/code
hdfs dfs -put cntk-mpi.sh hdfs://$1/$2/examples/mpi/cntk/code
}

if [ $# != 1 ]; then
echo "You must input hdfs socket as the only parameter! Or you cannot run this script correctly!"
if [ $# != 2 ]; then
echo "You must input hdfs socket and username as the only two parameters! Or you cannot run this script correctly!"
exit 1
fi

#make directory on HDFS
echo "Make mpi cntk directory, waiting..."
hdfs dfs -mkdir -p hdfs://$1/examples/mpi/cntk/code
hdfs dfs -mkdir -p hdfs://$1/examples/mpi/cntk/output
hdfs dfs -mkdir -p hdfs://$1/examples/cntk/data
hdfs dfs -mkdir -p hdfs://$1/$2/examples/mpi/cntk/code
hdfs dfs -mkdir -p hdfs://$1/$2/examples/mpi/cntk/output
hdfs dfs -mkdir -p hdfs://$1/$2/examples/cntk/data

hdfs dfs -test -e hdfs://$1/examples/mpi/cntk/code/*
hdfs dfs -test -e hdfs://$1/$2/examples/mpi/cntk/code/*
if [ $? -eq 0 ] ;then
echo "Code exists on HDFS!"
else
mpi_cntk_prepare_code $1
mpi_cntk_prepare_code $1 $2
echo "Have prepared code!"
fi

hdfs dfs -test -e hdfs://$1/examples/cntk/data/*
hdfs dfs -test -e hdfs://$1/$2/examples/cntk/data/*
if [ $? -eq 0 ] ;then
echo "Data exists on HDFS!"
else
mpi_cntk_prepare_data $1
mpi_cntk_prepare_data $1 $2
echo "Have prepared data"
fi

#delete the files
rm cntk-mpi.sh* G2P.cntk* cmudict* tiny.ctf*
rm -rf cntk-mpi.sh* G2P.cntk* mpi_cntk_data/
echo "Removed local mpi cntk code and data succeeded!"

#mpi tensorflow cifar-10 prepare
Expand All @@ -88,7 +81,7 @@ function mpi_tensorflow_prepare_data(){
echo "Uploading cifar-10 data, waiting..."
for i in `ls cifar-10-batches-py`
do
hdfs dfs -put cifar-10-batches-py/$i hdfs://$1/examples/tensorflow/distributed-cifar-10/data
hdfs dfs -put cifar-10-batches-py/$i hdfs://$1/$2/examples/tensorflow/distributed-cifar-10/data
done
}

Expand All @@ -98,27 +91,27 @@ function mpi_tensorflow_prepare_code(){

#upload the code to HDFS
echo "Uploading benchmarks code, waiting..."
hdfs dfs -put benchmarks/ hdfs://$1/examples/tensorflow/distributed-cifar-10/code
hdfs dfs -put benchmarks/ hdfs://$1/$2/examples/tensorflow/distributed-cifar-10/code
}

echo "Make mpi tensorflow directory, waiting..."
hdfs dfs -mkdir -p hdfs://$1/examples/mpi/tensorflow/output
hdfs dfs -mkdir -p hdfs://$1/examples/tensorflow/distributed-cifar-10/code
hdfs dfs -mkdir -p hdfs://$1/examples/tensorflow/distributed-cifar-10/data
hdfs dfs -mkdir -p hdfs://$1/$2/examples/mpi/tensorflow/output
hdfs dfs -mkdir -p hdfs://$1/$2/examples/tensorflow/distributed-cifar-10/code
hdfs dfs -mkdir -p hdfs://$1/$2/examples/tensorflow/distributed-cifar-10/data

hdfs dfs -test -e hdfs://$1/examples/tensorflow/distributed-cifar-10/code/*
hdfs dfs -test -e hdfs://$1/$2/examples/tensorflow/distributed-cifar-10/code/*
if [ $? -eq 0 ] ;then
echo "Code exists on HDFS!"
else
mpi_tensorflow_prepare_code $1
mpi_tensorflow_prepare_code $1 $2
echo "Have prepared code!"
fi

hdfs dfs -test -e hdfs://$1/examples/tensorflow/distributed-cifar-10/data/*
hdfs dfs -test -e hdfs://$1/$2/examples/tensorflow/distributed-cifar-10/data/*
if [ $? -eq 0 ] ;then
echo "Data exists on HDFS!"
else
mpi_tensorflow_prepare_data $1
mpi_tensorflow_prepare_data $1 $2
echo "Have prepared data"
fi

Expand Down
6 changes: 3 additions & 3 deletions examples/mpi/tensorflow-mpi.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@
"image": "openpai/pai.example.tensorflow-mpi",

// download cifar10 dataset from http://www.cs.toronto.edu/~kriz/cifar.html and upload to hdfs
"dataDir": "$PAI_DEFAULT_FS_URI/examples/tensorflow/distributed-cifar-10/data",
"dataDir": "$PAI_DEFAULT_FS_URI/$PAI_USERNAME/examples/tensorflow/distributed-cifar-10/data",
// make a new dir for output on hdfs
"outputDir": "$PAI_DEFAULT_FS_URI/examples/mpi/tensorflow/output",
"outputDir": "$PAI_DEFAULT_FS_URI/$PAI_USERNAME/examples/mpi/tensorflow/output",
// download code from tensorflow benchmark https://git.io/vF4wT and upload to hdfs
"codeDir": "$PAI_DEFAULT_FS_URI/examples/tensorflow/distributed-cifar-10/code",
"codeDir": "$PAI_DEFAULT_FS_URI/$PAI_USERNAME/examples/tensorflow/distributed-cifar-10/code",

"taskRoles": [
{
Expand Down
Loading

0 comments on commit 29ba2cc

Please sign in to comment.