Skip to content

Commit

Permalink
Build Python wheels for Windows (#260)
Browse files Browse the repository at this point in the history
* Build Python wheels for Windows

* Update Python README

* Pin OS version when building wheels
  • Loading branch information
guillaumekln authored Sep 15, 2021
1 parent 43df18f commit 93a41a2
Show file tree
Hide file tree
Showing 7 changed files with 61 additions and 32 deletions.
24 changes: 3 additions & 21 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,33 +43,13 @@ jobs:
test/onmt_tokenizer_test test/data
build-cpp-windows:
name: Build C++ library on windows-latest
runs-on: windows-latest

steps:
- uses: actions/checkout@v2
with:
submodules: recursive

- name: Download ICU
run: |
C:\msys64\usr\bin\wget.exe -nv https://github.com/unicode-org/icu/releases/download/release-68-2/icu4c-68_2-Win64-MSVC2019.zip -O icu.zip
Expand-Archive -Force icu.zip ${{ github.workspace }}\icu
- name: Build and install
run: |
cmake -DCMAKE_INSTALL_PREFIX=${{ github.workspace }}\install -DICU_INCLUDE_DIRS=${{ github.workspace }}\icu\include -DICU_LIBRARIES=${{ github.workspace }}\icu\lib64\icuuc.lib .
cmake --build . --target install
build-and-test-python-wheels:
name: Build and test Python wheels on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest]
os: [ubuntu-20.04, macos-10.15, windows-2019]

steps:
- uses: actions/checkout@v2
Expand All @@ -82,7 +62,9 @@ jobs:
package-dir: bindings/python
output-dir: wheelhouse
env:
CIBW_ENVIRONMENT_WINDOWS: TOKENIZER_ROOT='${{ github.workspace }}\install'
CIBW_BEFORE_ALL: bindings/python/tools/prepare_build_environment.sh
CIBW_BEFORE_ALL_WINDOWS: bash bindings/python/tools/prepare_build_environment_windows.sh
CIBW_BEFORE_BUILD: pip install pybind11==2.7.1
CIBW_MANYLINUX_X86_64_IMAGE: manylinux2010
CIBW_TEST_COMMAND: pytest {project}/bindings/python/test/test.py
Expand Down
10 changes: 7 additions & 3 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ option(BUILD_TESTS "Compile unit tests" OFF)
option(BUILD_SHARED_LIBS "Build shared libraries" ON)

set(CMAKE_CXX_STANDARD 11)
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
if(CMAKE_VERSION VERSION_LESS "3.7.0")
set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
endif()

if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
Expand Down Expand Up @@ -40,13 +42,15 @@ if(MSVC)
endif()
set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$<CONFIG:Debug>:Debug>")
endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4 /wd4251")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /W4 /wd4251 /d2FH4-")
set(ICU_COMPONENTS uc dt)
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wextra")
set(ICU_COMPONENTS uc data)
endif()

if(NOT DEFINED ICU_INCLUDE_DIRS OR NOT DEFINED ICU_LIBRARIES)
find_package(ICU REQUIRED COMPONENTS uc data)
find_package(ICU REQUIRED COMPONENTS ${ICU_COMPONENTS})
endif()

set(INCLUDE_DIRECTORIES
Expand Down
2 changes: 1 addition & 1 deletion bindings/python/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ pip install pyonmttok

**Requirements:**

* OS: Linux, macOS
* OS: Linux, macOS, Windows
* Python version: >= 3.5
* pip version: >= 19.0

Expand Down
18 changes: 18 additions & 0 deletions bindings/python/pyonmttok/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1,19 @@
import sys

if sys.platform == "win32":
import os
import ctypes
import pkg_resources

module_name = sys.modules[__name__].__name__
package_dir = pkg_resources.resource_filename(module_name, "")

add_dll_directory = getattr(os, "add_dll_directory", None)
if add_dll_directory is not None:
add_dll_directory(package_dir)

for filename in os.listdir(package_dir):
if filename.endswith(".dll"):
ctypes.CDLL(os.path.join(package_dir, filename))

from ._ext import *
5 changes: 5 additions & 0 deletions bindings/python/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,13 @@ def _maybe_add_library_root(lib_name, header_only=False):

cflags = ["-std=c++17", "-fvisibility=hidden"]
ldflags = []
package_data = {}
if sys.platform == "darwin":
cflags.append("-mmacosx-version-min=10.14")
ldflags.append("-Wl,-rpath,/usr/local/lib")
elif sys.platform == "win32":
cflags = ["/std:c++17", "/d2FH4-"]
package_data["pyonmttok"] = ["*.dll"]

tokenizer_module = Extension(
"pyonmttok._ext",
Expand Down Expand Up @@ -74,6 +78,7 @@ def _maybe_add_library_root(lib_name, header_only=False):
},
keywords="tokenization opennmt unicode bpe sentencepiece subword",
packages=find_packages(),
package_data=package_data,
python_requires=">=3.5,<3.10",
setup_requires=["pytest-runner"],
tests_require=["pytest"],
Expand Down
14 changes: 7 additions & 7 deletions bindings/python/test/test.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,17 +74,17 @@ def test_invalid_annotation():
def test_file(tmpdir):
input_path = str(tmpdir.join("input.txt"))
output_path = str(tmpdir.join("output.txt"))
with open(input_path, "w") as input_file:
with open(input_path, "w", encoding="utf-8") as input_file:
input_file.write("Hello world!")
tokenizer = pyonmttok.Tokenizer("aggressive", joiner_annotate=True, joiner_new=True)
tokenizer.tokenize_file(input_path, output_path)
assert os.path.exists(output_path)
with open(output_path) as output_file:
with open(output_path, encoding="utf-8") as output_file:
assert output_file.readline().strip() == "Hello world ■ !"
os.remove(input_path)
tokenizer.detokenize_file(output_path, input_path)
assert os.path.exists(input_path)
with open(input_path) as input_file:
with open(input_path, encoding="utf-8") as input_file:
assert input_file.readline().strip() == "Hello world!"

def test_invalid_files(tmpdir):
Expand All @@ -98,7 +98,7 @@ def test_invalid_files(tmpdir):
directory.ensure(dir=True)
directory = str(directory)
input_file = str(tmpdir.join("input.txt"))
with open(input_file, "w") as f:
with open(input_file, "w", encoding="utf-8") as f:
f.write("Hello world!")
with pytest.raises(ValueError):
tokenizer.tokenize_file(input_file, directory)
Expand Down Expand Up @@ -129,7 +129,7 @@ def test_sp_tokenizer():
def test_sp_with_vocabulary(tmpdir):
sp_model_path = os.path.join(_DATA_DIR, "sp-models", "wmtende.model")
vocab_path = str(tmpdir.join("vocab.txt"))
with open(vocab_path, "w") as vocab_file:
with open(vocab_path, "w", encoding="utf-8") as vocab_file:
vocab_file.write("▁Wor\n")

with pytest.raises(ValueError, match="spacer_annotate"):
Expand Down Expand Up @@ -218,7 +218,7 @@ def test_bpe_learner(tmpdir):
learner.ingest("hello world")
model_path = str(tmpdir.join("bpe.model"))
tokenizer = learner.learn(model_path)
with open(model_path) as model:
with open(model_path, encoding="utf-8") as model:
assert model.read() == "#version: 0.2\ne l\nel l\n"
tokens, _ = tokenizer.tokenize("hello")
assert tokens == ["h■", "ell■", "o"]
Expand All @@ -231,7 +231,7 @@ def test_bpe_learner_tokens(tmpdir):
learner.ingest_token(token)
model_path = str(tmpdir.join("bpe.model"))
learner.learn(model_path)
with open(model_path) as model:
with open(model_path, encoding="utf-8") as model:
assert model.read() == "#version: 0.2\na b</w>\nc d</w>\n"

@pytest.mark.parametrize("keep_vocab", [False, True])
Expand Down
20 changes: 20 additions & 0 deletions bindings/python/tools/prepare_build_environment_windows.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
#! /bin/bash

set -e
set -x

ROOT_DIR=$PWD
ICU_VERSION=${ICU_VERSION:-69.1}

curl -L -O -nv https://github.com/unicode-org/icu/releases/download/release-${ICU_VERSION/./-}/icu4c-${ICU_VERSION/./_}-Win64-MSVC2019.zip
unzip *.zip -d icu

rm -rf build
mkdir build
cd build
cmake -DLIB_ONLY=ON -DICU_ROOT=$ROOT_DIR/icu/ -DCMAKE_INSTALL_PREFIX=$TOKENIZER_ROOT ..
cmake --build . --config Release --target install

cp $ROOT_DIR/icu/bin64/icudt*.dll $ROOT_DIR/bindings/python/pyonmttok/
cp $ROOT_DIR/icu/bin64/icuuc*.dll $ROOT_DIR/bindings/python/pyonmttok/
cp $TOKENIZER_ROOT/bin/OpenNMTTokenizer.dll $ROOT_DIR/bindings/python/pyonmttok/

0 comments on commit 93a41a2

Please sign in to comment.