Skip to content

Commit

Permalink
create and update tarball for pets (#372)
Browse files Browse the repository at this point in the history
  • Loading branch information
changhiskhan authored Dec 14, 2022
1 parent c2c7ab9 commit a6877ba
Showing 1 changed file with 12 additions and 2 deletions.
14 changes: 12 additions & 2 deletions python/tools/update_pet.sh
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,25 @@

set -e

PUBLIC_URI_ROOT="https://eto-public.s3.us-west-2.amazonaws.com/datasets/oxford_pet/"
PUBLIC_URI_ROOT="https://eto-public.s3.us-west-2.amazonaws.com/datasets/oxford_pet"

DATASET_ROOT=$1 # this is the root dir of the raw dataset
OUTPUT_PATH=$2 # this is the root dir of the lance/parquet dataset

rm -rf ${OUTPUT_PATH}
rm -rf ${OUTPUT_PATH}.tar.gz

python lance/data/convert/oxford_pet.py \
$DATASET_ROOT --output-path $OUTPUT_PATH \
--fmt lance --images-root $PUBLIC_URI_ROOT


pushd ${OUTPUT_PATH}/../
tar -cvf oxford_pet.lance.tar.gz oxford_pet.lance/
popd

aws s3 rm --recursive s3://eto-public/datasets/oxford_pet/oxford_pet.lance
aws s3 cp --recursive $OUTPUT_PATH s3://eto-public/datasets/oxford_pet/oxford_pet.lance
aws s3 cp --recursive $OUTPUT_PATH s3://eto-public/datasets/oxford_pet/oxford_pet.lance

aws s3 rm s3://eto-public/datasets/oxford_pet/oxford_pet.lance.tar.gz
aws s3 cp ${OUTPUT_PATH}.tar.gz s3://eto-public/datasets/oxford_pet/oxford_pet.lance.tar.gz

0 comments on commit a6877ba

Please sign in to comment.