From ce90ce70bb43b1edc795ab6cccc0d2df71aa29c2 Mon Sep 17 00:00:00 2001
From: Vikas Gupta <vikasgupta78@gmail.com>
Date: Tue, 28 Mar 2023 18:00:42 +0530
Subject: [PATCH 1/3] documentation for issue #450

---
 docs/dataSourcesAndSinks/aws-s3.md | 35 ++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)
 create mode 100644 docs/dataSourcesAndSinks/aws-s3.md
diff --git a/docs/dataSourcesAndSinks/aws-s3.md b/docs/dataSourcesAndSinks/aws-s3.md
new file mode 100644
index 000000000..9311dd23d
--- /dev/null
+++ b/docs/dataSourcesAndSinks/aws-s3.md
@@ -0,0 +1,35 @@
+---
+title: AWS S3
+parent: Data Sources and Sinks
+---
+
+# S3
+
+1. set a bucket e.g. zingg28032023 and a folder inside it e.g. zingg
+=> make it publicly accessible
+
+2. create aws access key and export via env vars:
+
+export AWS_ACCESS_KEY_ID=<access key id>
+export AWS_SECRET_ACCESS_KEY=<access key>
+
+(if mfa is enabled AWS_SESSION_TOKEN env var would also be needed )
+
+3. Download hadoop-aws-3.1.0.jar and aws-java-sdk-bundle-1.11.271.jar via maven
+
+4. set above in zingg.conf :
+spark.jars=/<location>/hadoop-aws-3.1.0.jar,/<location>/aws-java-sdk-bundle-1.11.271.jar
+
+5. Run using:
+
+ ./scripts/zingg.sh --phase findTrainingData --properties-file config/zingg.conf  --conf examples/febrl/config.json --zinggDir  s3a://zingg28032023/zingg
+ ./scripts/zingg.sh --phase label --properties-file config/zingg.conf  --conf examples/febrl/config.json --zinggDir  s3a://zingg28032023/zingg
+ ./scripts/zingg.sh --phase train --properties-file config/zingg.conf  --conf examples/febrl/config.json --zinggDir  s3a://zingg28032023/zingg
+ ./scripts/zingg.sh --phase match --properties-file config/zingg.conf  --conf examples/febrl/config.json --zinggDir  s3a://zingg28032023/zingg
+
+6. Models etc. would get saved in 
+Amazon S3 > Buckets > zingg28032023 >zingg > 100
+
+References:
+
+1. https://spark.apache.org/docs/latest/cloud-integration.html

From 915aff2badea395f09e368ac6ff42bddbecf4b31 Mon Sep 17 00:00:00 2001
From: Vikas Gupta <vikasgupta78@gmail.com>
Date: Tue, 28 Mar 2023 18:09:26 +0530
Subject: [PATCH 2/3] formatting

---
 docs/dataSourcesAndSinks/aws-s3.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/docs/dataSourcesAndSinks/aws-s3.md b/docs/dataSourcesAndSinks/aws-s3.md
index 9311dd23d..bf28c8107 100644
--- a/docs/dataSourcesAndSinks/aws-s3.md
+++ b/docs/dataSourcesAndSinks/aws-s3.md
@@ -5,10 +5,10 @@ parent: Data Sources and Sinks
 
 # S3
 
-1. set a bucket e.g. zingg28032023 and a folder inside it e.g. zingg
+1. Set a bucket e.g. zingg28032023 and a folder inside it e.g. zingg
 => make it publicly accessible
 
-2. create aws access key and export via env vars:
+2. Create aws access key and export via env vars:
 
 export AWS_ACCESS_KEY_ID=<access key id>
 export AWS_SECRET_ACCESS_KEY=<access key>
@@ -17,7 +17,7 @@ export AWS_SECRET_ACCESS_KEY=<access key>
 
 3. Download hadoop-aws-3.1.0.jar and aws-java-sdk-bundle-1.11.271.jar via maven
 
-4. set above in zingg.conf :
+4. Set above in zingg.conf :
 spark.jars=/<location>/hadoop-aws-3.1.0.jar,/<location>/aws-java-sdk-bundle-1.11.271.jar
 
 5. Run using:

From 0b748a2c5a2a3d6fc302db28b4b6f0d23d0b24d9 Mon Sep 17 00:00:00 2001
From: Vikas Gupta <vikasgupta78@gmail.com>
Date: Tue, 28 Mar 2023 23:20:31 +0530
Subject: [PATCH 3/3] removed public access clause

---
 docs/dataSourcesAndSinks/aws-s3.md | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/docs/dataSourcesAndSinks/aws-s3.md b/docs/dataSourcesAndSinks/aws-s3.md
index bf28c8107..54b1d8168 100644
--- a/docs/dataSourcesAndSinks/aws-s3.md
+++ b/docs/dataSourcesAndSinks/aws-s3.md
@@ -6,9 +6,8 @@ parent: Data Sources and Sinks
 # S3
 
 1. Set a bucket e.g. zingg28032023 and a folder inside it e.g. zingg
-=> make it publicly accessible
 
-2. Create aws access key and export via env vars:
+2. Create aws access key and export via env vars (ensure that the user with below keys has read/write access to above):
 
 export AWS_ACCESS_KEY_ID=<access key id>
 export AWS_SECRET_ACCESS_KEY=<access key>
@@ -29,7 +28,3 @@ spark.jars=/<location>/hadoop-aws-3.1.0.jar,/<location>/aws-java-sdk-bundle-1.11
 
 6. Models etc. would get saved in 
 Amazon S3 > Buckets > zingg28032023 >zingg > 100
-
-References:
-
-1. https://spark.apache.org/docs/latest/cloud-integration.html