-
Notifications
You must be signed in to change notification settings - Fork 3.1k
203 lines (174 loc) · 8.07 KB
/
sarplus.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# ---------------------------------------------------------
# Copyright (c) Recommenders contributors.
# Licensed under the MIT License.
# ---------------------------------------------------------
# This workflow will run tests and do packaging for contrib/sarplus.
#
# References:
# * GitHub Actions workflow templates
# + [python package](https://github.com/actions/starter-workflows/blob/main/ci/python-package.yml)
# + [scala](https://github.com/actions/starter-workflows/blob/main/ci/scala.yml)
# * [GitHub hosted runner - Ubuntu 24.04 LTS](https://github.com/actions/runner-images/blob/main/images/ubuntu/Ubuntu2404-Readme.md)
# * [Azure Databricks runtime releases](https://docs.microsoft.com/en-us/azure/databricks/release-notes/runtime/releases)
# * [Azure Synapse Analytics runtimes](https://docs.microsoft.com/en-us/azure/synapse-analytics/spark/apache-spark-version-support)
name: sarplus test and package
on:
push:
paths:
- contrib/sarplus/python/**
- contrib/sarplus/scala/**
- contrib/sarplus/VERSION
- .github/workflows/sarplus.yml
# Enable manual trigger
workflow_dispatch:
env:
SARPLUS_ROOT: ${{ github.workspace }}/contrib/sarplus
PYTHON_ROOT: ${{ github.workspace }}/contrib/sarplus/python
SCALA_ROOT: ${{ github.workspace }}/contrib/sarplus/scala
jobs:
python:
# Test pysarplus with different versions of Python.
# Package pysarplus and upload as GitHub workflow artifact when merged into
# the main branch.
runs-on: ubuntu-24.04
strategy:
matrix:
python-version: ["3.8", "3.9", "3.10", "3.11"]
steps:
- uses: actions/checkout@v4
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install -U build cibuildwheel pip twine
python -m pip install -U flake8 pytest pytest-cov scikit-learn
# Install sbt
# See https://github.com/yokra9/akka-http-example/pull/119/files
echo "deb https://repo.scala-sbt.org/scalasbt/debian all main" | sudo tee /etc/apt/sources.list.d/sbt.list
echo "deb https://repo.scala-sbt.org/scalasbt/debian /" | sudo tee /etc/apt/sources.list.d/sbt_old.list
curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x2EE0EA64E40A89B84B2DF73499E82A75642AC823" | sudo apt-key add
sudo apt-get update
sudo apt-get install sbt
- name: Lint with flake8
run: |
cd "${PYTHON_ROOT}"
# See https://flake8.pycqa.org/en/latest/user/index.html
flake8 .
- name: Package and check
run: |
# build
cd "${PYTHON_ROOT}"
cp "${SARPLUS_ROOT}/VERSION" ./pysarplus/VERSION
python -m build --sdist
PYTHON_VER='${{ matrix.python-version }}'
MINOR_VER="${PYTHON_VER#*.}"
CIBW_BUILD="cp3${MINOR_VER}-manylinux_x86_64" python -m cibuildwheel --platform linux --output-dir dist
python -m twine check dist/*
# set sarplus_version for the artifact upload step
SARPLUS_VERSION=$(cat "${SARPLUS_ROOT}/VERSION")
echo "sarplus_version=${SARPLUS_VERSION}" >> $GITHUB_ENV
- name: Test
run: |
cd "${PYTHON_ROOT}"
python -m pip install dist/*.whl
cd "${SCALA_ROOT}"
export SPARK_VERSION=$(python -m pip show pyspark | grep -i version | cut -d ' ' -f 2)
SPARK_JAR_DIR=$(python -m pip show pyspark | grep -i location | cut -d ' ' -f2)/pyspark/jars
SCALA_JAR=$(ls ${SPARK_JAR_DIR}/scala-library*)
HADOOP_JAR=$(ls ${SPARK_JAR_DIR}/hadoop-client-api*)
SCALA_VERSION=${SCALA_JAR##*-}
export SCALA_VERSION=${SCALA_VERSION%.*}
HADOOP_VERSION=${HADOOP_JAR##*-}
export HADOOP_VERSION=${HADOOP_VERSION%.*}
sbt ++"${SCALA_VERSION}"! package
cd "${PYTHON_ROOT}"
pytest ./tests
- name: Upload Python wheel as GitHub artifact when merged into main
# Upload the whl file of the specific python version
if: github.ref == 'refs/heads/main'
uses: actions/upload-artifact@v4
with:
name: pysarplus-${{ env.sarplus_version }}-cp${{ matrix.python-version }}-wheel
path: ${{ env.PYTHON_ROOT }}/dist/*.whl
- name: Upload Python source as GitHub artifact when merged into main
# Only one pysarplus source tar file is needed
if: github.ref == 'refs/heads/main' && matrix.python-version == '3.10'
uses: actions/upload-artifact@v4
with:
name: pysarplus-${{ env.sarplus_version }}-source
path: ${{ env.PYTHON_ROOT }}/dist/*.tar.gz
scala:
# Test sarplus with different versions of Databricks and Synapse runtime
runs-on: ubuntu-22.04
strategy:
matrix:
include:
# For Azure Databricks 7.3 LTS
- scala-version: "2.12.10"
spark-version: "3.0.1"
hadoop-version: "2.7.4"
# For Azure Databricks 9.1 LTS and Azure Synapse Apache Spark 3.1 runtime
- scala-version: "2.12.10"
spark-version: "3.1.2"
hadoop-version: "2.7.4"
# For Azure Databricks 10.4 LTS
- scala-version: "2.12.14"
spark-version: "3.2.1"
hadoop-version: "3.3.1"
steps:
- uses: actions/checkout@v4
- name: Install sbt
run: |
# See https://github.com/yokra9/akka-http-example/pull/119/files
echo "deb https://repo.scala-sbt.org/scalasbt/debian all main" | sudo tee /etc/apt/sources.list.d/sbt.list
echo "deb https://repo.scala-sbt.org/scalasbt/debian /" | sudo tee /etc/apt/sources.list.d/sbt_old.list
curl -sL "https://keyserver.ubuntu.com/pks/lookup?op=get&search=0x2EE0EA64E40A89B84B2DF73499E82A75642AC823" | sudo apt-key add
sudo apt-get update
sudo apt-get install sbt
- name: Test
run: |
cd "${SCALA_ROOT}"
export SPARK_VERSION="${{ matrix.spark-version }}"
export HADOOP_VERSION="${{ matrix.hadoop-version }}"
sbt ++${{ matrix.scala-version }}! test
- name: Package
# Generate jar files for Spark below 3.2 and above 3.2
if: github.ref == 'refs/heads/main' && matrix.spark-version != '3.0.1'
env:
GPG_KEY: ${{ secrets.SARPLUS_GPG_PRI_KEY_ASC }}
run: |
SARPLUS_VERSION=$(cat "${SARPLUS_ROOT}/VERSION")
# generate artifacts
cd "${SCALA_ROOT}"
export SPARK_VERSION='${{ matrix.spark-version }}'
export HADOOP_VERSION='${{ matrix.hadoop-version }}'
export SCALA_VERSION='${{ matrix.scala-version }}'
sbt ++${SCALA_VERSION}! package
sbt ++${SCALA_VERSION}! packageDoc
sbt ++${SCALA_VERSION}! packageSrc
sbt ++${SCALA_VERSION}! makePom
# sign with GPG
cd "${SCALA_ROOT}/target/scala-2.12"
gpg --import <(cat <<< "${GPG_KEY}")
for file in {*.jar,*.pom}; do gpg -ab "${file}"; done
# bundle
[ '${{ matrix.spark-version }}' == '3.1.2' ] \
&& jar cvf sarplus-${SARPLUS_VERSION}-bundle_2.12-spark-${SPARK_VERSION}.jar \
sarplus_*.jar \
sarplus_*.pom \
sarplus_*.asc
[ '${{ matrix.spark-version }}' == '3.2.1' ] \
&& jar cvf sarplus-spark-3.2-plus-bundle_2.12-${SARPLUS_VERSION}.jar \
sarplus-spark*.jar \
sarplus-spark*.pom \
sarplus-spark*.asc
# set sarplus_version for the artifact upload step
echo "sarplus_version=${SARPLUS_VERSION}" >> $GITHUB_ENV
- name: Upload Scala bundle as GitHub artifact when merged into main
if: github.ref == 'refs/heads/main'
uses: actions/upload-artifact@v4
with:
name: sarplus-${{ env.sarplus_version }}-bundle_2.12-spark-${{ matrix.spark-version }}-jar
path: ${{ env.SCALA_ROOT }}/target/scala-2.12/*bundle*.jar