From 904050222b945ec5bf35de6c608f93fac7fcca79 Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Tue, 17 May 2022 23:55:00 -0700 Subject: [PATCH 1/2] Combing workflows --- .github/workflows/pull_request_test.yml | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/.github/workflows/pull_request_test.yml b/.github/workflows/pull_request_test.yml index 6f6da9734..23dbc1de2 100644 --- a/.github/workflows/pull_request_test.yml +++ b/.github/workflows/pull_request_test.yml @@ -1,20 +1,25 @@ name: Feathr Scala Tests And Azure E2E Integration on: + push: + branches: [rijai/citest] + paths-ignore: + - 'docs/**' + - '**/README.md' pull_request: - branches: [main] + branches: [rijai/citest] paths-ignore: - 'docs/**' - '**/README.md' pull_request_target: - types: [labeled] - paths-ignore: - - 'docs/**' - - '**/README.md' + branches: [rijai/citest] + paths-ignore: + - 'docs/**' + - '**/README.md' jobs: sbt_test: runs-on: ubuntu-latest - if: contains(github.event.pull_request.labels.*.name, 'safe to test') + if: github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'safe to test')) steps: - uses: actions/checkout@v2 - name: Set up JDK 8 @@ -26,7 +31,7 @@ jobs: run: sbt clean && sbt test python_lint: runs-on: ubuntu-latest - if: contains(github.event.pull_request.labels.*.name, 'safe to test') + if: github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'safe to test')) steps: - name: Set up Python 3.8 uses: actions/setup-python@v2 @@ -46,7 +51,7 @@ jobs: databricks_test: runs-on: ubuntu-latest - if: contains(github.event.pull_request.labels.*.name, 'safe to test') + if: github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'safe to test')) steps: - uses: actions/checkout@v2 - name: Set up JDK 8 @@ -110,7 +115,7 @@ jobs: azure_synapse_test: # might be a bit duplication to setup both the azure_synapse test and databricks test, but for now we will keep those to accelerate the test speed runs-on: ubuntu-latest - if: contains(github.event.pull_request.labels.*.name, 'safe to test') + if: github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'safe to test')) steps: - uses: actions/checkout@v2 - name: Set up JDK 8 From a7fae70b635970ae2f04547aeb5b7feb9030e097 Mon Sep 17 00:00:00 2001 From: Richin Jain Date: Wed, 18 May 2022 14:54:54 -0700 Subject: [PATCH 2/2] Consolidating CI run workflows --- .github/workflows/main_merge_e2e.yml | 166 ------------------ ...st_test.yml => pull_request_push_test.yml} | 13 +- 2 files changed, 10 insertions(+), 169 deletions(-) delete mode 100644 .github/workflows/main_merge_e2e.yml rename .github/workflows/{pull_request_test.yml => pull_request_push_test.yml} (96%) diff --git a/.github/workflows/main_merge_e2e.yml b/.github/workflows/main_merge_e2e.yml deleted file mode 100644 index 51da63df9..000000000 --- a/.github/workflows/main_merge_e2e.yml +++ /dev/null @@ -1,166 +0,0 @@ -name: Feathr Scala Tests And Azure E2E Integration - -on: - push: - branches: [main] - paths-ignore: - - 'docs/**' - - '**/README.md' - -jobs: - sbt_test: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Set up JDK 8 - uses: actions/setup-java@v2 - with: - java-version: "8" - distribution: "temurin" - - name: Run tests - run: sbt clean && sbt test - python_lint: - runs-on: ubuntu-latest - steps: - - name: Set up Python 3.8 - uses: actions/setup-python@v2 - with: - python-version: 3.8 - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install flake8 - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics - - databricks_test: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Set up JDK 8 - uses: actions/setup-java@v2 - with: - java-version: "8" - distribution: "temurin" - - name: Build JAR - run: | - sbt assembly - # remote folder for CI upload - echo "CI_SPARK_REMOTE_JAR_FOLDER=feathr_jar_github_action_$(date +"%H_%M_%S")" >> $GITHUB_ENV - # get local jar name without paths so version change won't affect it - echo "FEATHR_LOCAL_JAR_NAME=$(ls target/scala-2.12/*.jar| xargs -n 1 basename)" >> $GITHUB_ENV - # get local jar name without path - echo "FEATHR_LOCAL_JAR_FULL_NAME_PATH=$(ls target/scala-2.12/*.jar)" >> $GITHUB_ENV - - name: Set up Python 3.8 - uses: actions/setup-python@v2 - with: - python-version: 3.8 - - name: Install Feathr Package - run: | - python -m pip install --upgrade pip - python -m pip install pytest pytest-xdist databricks-cli - python -m pip install -e ./feathr_project/ - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Set env variable and upload jars - env: - # set this for databricks CLI - DATABRICKS_HOST: ${{secrets.DATABRICKS_HOST}} - DATABRICKS_TOKEN: ${{secrets.DATABRICKS_WORKSPACE_TOKEN_VALUE}} - run: | - # overwrite corresponding environment variables to utilize feathr to upload the files - # assuming there will be only one jar in the target folder - databricks fs cp ${{ env.FEATHR_LOCAL_JAR_FULL_NAME_PATH}} dbfs:/${{ env.CI_SPARK_REMOTE_JAR_FOLDER}}/${{ env.FEATHR_LOCAL_JAR_NAME}} --overwrite - - name: Run Feathr with Databricks - env: - PROJECT_CONFIG__PROJECT_NAME: "feathr_github_ci_databricks" - SPARK_CONFIG__SPARK_CLUSTER: databricks - SPARK_CONFIG__DATABRICKS__WORKSPACE_INSTANCE_URL: ${{secrets.DATABRICKS_HOST}} - DATABRICKS_WORKSPACE_TOKEN_VALUE: ${{secrets.DATABRICKS_WORKSPACE_TOKEN_VALUE}} - SPARK_CONFIG__DATABRICKS__CONFIG_TEMPLATE: '{"run_name":"FEATHR_FILL_IN","new_cluster":{"spark_version":"9.1.x-scala2.12","num_workers":2,"spark_conf":{"FEATHR_FILL_IN":"FEATHR_FILL_IN"},"instance_pool_id":"${{secrets.DATABRICKS_INSTANCE_POOL_ID}}"},"libraries":[{"jar":"FEATHR_FILL_IN"}],"spark_jar_task":{"main_class_name":"FEATHR_FILL_IN","parameters":["FEATHR_FILL_IN"]}}' - REDIS_PASSWORD: ${{secrets.REDIS_PASSWORD}} - AZURE_CLIENT_ID: ${{secrets.AZURE_CLIENT_ID}} - AZURE_TENANT_ID: ${{secrets.AZURE_TENANT_ID}} - AZURE_CLIENT_SECRET: ${{secrets.AZURE_CLIENT_SECRET}} - S3_ACCESS_KEY: ${{secrets.S3_ACCESS_KEY}} - S3_SECRET_KEY: ${{secrets.S3_SECRET_KEY}} - ADLS_ACCOUNT: ${{secrets.ADLS_ACCOUNT}} - ADLS_KEY: ${{secrets.ADLS_KEY}} - BLOB_ACCOUNT: ${{secrets.BLOB_ACCOUNT}} - BLOB_KEY: ${{secrets.BLOB_KEY}} - JDBC_SF_PASSWORD: ${{secrets.JDBC_SF_PASSWORD}} - KAFKA_SASL_JAAS_CONFIG: ${{secrets.KAFKA_SASL_JAAS_CONFIG}} - SPARK_CONFIG__DATABRICKS__FEATHR_RUNTIME_LOCATION: dbfs:/${{ env.CI_SPARK_REMOTE_JAR_FOLDER}}/${{ env.FEATHR_LOCAL_JAR_NAME}} - - run: | - # run only test with databricks. run in 4 parallel jobs - pytest -n 4 - - azure_synapse_test: - # might be a bit duplication to setup both the azure_synapse test and databricks test, but for now we will keep those to accelerate the test speed - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v2 - - name: Set up JDK 8 - uses: actions/setup-java@v2 - with: - java-version: "8" - distribution: "temurin" - - name: Build JAR - run: | - sbt assembly - # remote folder for CI upload - echo "CI_SPARK_REMOTE_JAR_FOLDER=feathr_jar_github_action_$(date +"%H_%M_%S")" >> $GITHUB_ENV - # get local jar name without paths so version change won't affect it - echo "FEATHR_LOCAL_JAR_NAME=$(ls target/scala-2.12/*.jar| xargs -n 1 basename)" >> $GITHUB_ENV - # get local jar name without path - echo "FEATHR_LOCAL_JAR_FULL_NAME_PATH=$(ls target/scala-2.12/*.jar)" >> $GITHUB_ENV - - name: Set up Python 3.8 - uses: actions/setup-python@v2 - with: - python-version: 3.8 - - name: Azure Blob Storage Upload (Overwrite) - uses: fixpoint/azblob-upload-artifact@v4 - with: - connection-string: ${{secrets.SPARK_JAR_BLOB_CONNECTION_STRING}} - name: ${{ env.CI_SPARK_REMOTE_JAR_FOLDER}} - path: ${{ env.FEATHR_LOCAL_JAR_FULL_NAME_PATH}} - container: ${{secrets.SPARK_JAR_BLOB_CONTAINER}} - cleanup: "true" - - name: Install Feathr Package - run: | - python -m pip install --upgrade pip - python -m pip install pytest pytest-xdist - python -m pip install -e ./feathr_project/ - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - - name: Run Feathr with Azure Synapse - env: - PROJECT_CONFIG__PROJECT_NAME: "feathr_github_ci_synapse" - SPARK_CONFIG__SPARK_CLUSTER: azure_synapse - REDIS_PASSWORD: ${{secrets.REDIS_PASSWORD}} - AZURE_CLIENT_ID: ${{secrets.AZURE_CLIENT_ID}} - AZURE_TENANT_ID: ${{secrets.AZURE_TENANT_ID}} - AZURE_CLIENT_SECRET: ${{secrets.AZURE_CLIENT_SECRET}} - S3_ACCESS_KEY: ${{secrets.S3_ACCESS_KEY}} - S3_SECRET_KEY: ${{secrets.S3_SECRET_KEY}} - ADLS_ACCOUNT: ${{secrets.ADLS_ACCOUNT}} - ADLS_KEY: ${{secrets.ADLS_KEY}} - BLOB_ACCOUNT: ${{secrets.BLOB_ACCOUNT}} - BLOB_KEY: ${{secrets.BLOB_KEY}} - JDBC_TABLE: ${{secrets.JDBC_TABLE}} - JDBC_USER: ${{secrets.JDBC_USER}} - JDBC_PASSWORD: ${{secrets.JDBC_PASSWORD}} - JDBC_DRIVER: ${{secrets.JDBC_DRIVER}} - JDBC_SF_PASSWORD: ${{secrets.JDBC_SF_PASSWORD}} - KAFKA_SASL_JAAS_CONFIG: ${{secrets.KAFKA_SASL_JAAS_CONFIG}} - SPARK_CONFIG__AZURE_SYNAPSE__FEATHR_RUNTIME_LOCATION: "abfss://${{secrets.SPARK_JAR_BLOB_CONTAINER}}@feathrazuretest3storage.dfs.core.windows.net/${{ env.CI_SPARK_REMOTE_JAR_FOLDER}}/${{ env.FEATHR_LOCAL_JAR_NAME}}" - run: | - # skip databricks related test as we just ran the test; also seperate databricks and synapse test to make sure there's no write conflict - # run in 4 parallel jobs to make the time shorter - pytest -n 4 - - diff --git a/.github/workflows/pull_request_test.yml b/.github/workflows/pull_request_push_test.yml similarity index 96% rename from .github/workflows/pull_request_test.yml rename to .github/workflows/pull_request_push_test.yml index 23dbc1de2..546cb8635 100644 --- a/.github/workflows/pull_request_test.yml +++ b/.github/workflows/pull_request_push_test.yml @@ -2,17 +2,18 @@ name: Feathr Scala Tests And Azure E2E Integration on: push: - branches: [rijai/citest] + branches: [main] paths-ignore: - 'docs/**' - '**/README.md' pull_request: - branches: [rijai/citest] + branches: [main] paths-ignore: - 'docs/**' - '**/README.md' pull_request_target: - branches: [rijai/citest] + branches: [main] + types: [labeled, edited, opened, synchronize] paths-ignore: - 'docs/**' - '**/README.md' @@ -22,6 +23,8 @@ jobs: if: github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'safe to test')) steps: - uses: actions/checkout@v2 + with: + ref: ${{ github.event.pull_request.head.sha }} - name: Set up JDK 8 uses: actions/setup-java@v2 with: @@ -54,6 +57,8 @@ jobs: if: github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'safe to test')) steps: - uses: actions/checkout@v2 + with: + ref: ${{ github.event.pull_request.head.sha }} - name: Set up JDK 8 uses: actions/setup-java@v2 with: @@ -118,6 +123,8 @@ jobs: if: github.event_name == 'push' || github.event_name == 'pull_request' || (github.event_name == 'pull_request_target' && contains(github.event.pull_request.labels.*.name, 'safe to test')) steps: - uses: actions/checkout@v2 + with: + ref: ${{ github.event.pull_request.head.sha }} - name: Set up JDK 8 uses: actions/setup-java@v2 with: