Merge branch 'main' into 1.2.X

zillow · Jan 16, 2024 · e05ca4f · e05ca4f
2 parents 01847ed + aeabb24
commit e05ca4f
Show file tree

Hide file tree

Showing 24 changed files with 334 additions and 212 deletions.
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -4,15 +4,19 @@ on: [push, pull_request]
 
 jobs:
   build:
-    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+        python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"]
+    runs-on: ${{ matrix.os }}
 
     steps:
       - uses: actions/checkout@v4
 
-      - name: Set up Python 3.12
-        uses: actions/setup-python@v4
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v5
         with:
-          python-version: '3.12'
+          python-version: ${{ matrix.python-version }}
           cache: 'pip'
           cache-dependency-path: pyproject.toml
 
@@ -25,13 +29,13 @@ jobs:
 
       - name: Check formatting with black
         run: |
-          black --line-length 100 --diff --color .
-          black --line-length=100 --check .
+          black --line-length 99 --diff --color .
+          black --line-length=99 --check .
 
       - name: Linting with flake8
         run: |
           flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics || exit 1
-          flake8 . --count --exit-zero --max-line-length=100 --ignore=E402,W503 --statistics
+          flake8 . --count --exit-zero --max-line-length=99 --ignore=E402,W503 --statistics
 
       - name: Test with pytest
         run: |

diff --git a/.github/workflows/draft-pdf.yml b/.github/workflows/draft-pdf.yml
@@ -19,7 +19,7 @@ jobs:
           paper-path: paper/paper.md
 
       - name: Upload
-        uses: actions/upload-artifact@v3
+        uses: actions/upload-artifact@v4
         with:
           name: paper
           # This is the output path where Pandoc will write the compiled

diff --git a/.github/workflows/github-pages.yml b/.github/workflows/github-pages.yml
@@ -13,7 +13,7 @@ jobs:
       - uses: actions/checkout@v4
 
       - name: Set up Python 3.12
-        uses: actions/setup-python@v4
+        uses: actions/setup-python@v5
         with:
           python-version: '3.12'
 

diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
@@ -13,73 +13,64 @@ jobs:
     runs-on: ${{ matrix.os }}
     strategy:
       matrix:
-        os: [ubuntu-latest, windows-latest, macOS-latest]
+        os: [ubuntu-latest, windows-latest, macos-latest]
 
     steps:
       - uses: actions/checkout@v4
 
-      - uses: actions/setup-python@v4
-        with:
-          python-version: '3.12'
-
-      - name: Install cibuildwheel
-        run: python -m pip install cibuildwheel==2.16.2
-
-      - name: Build wheels for Linux
-        if: matrix.os == 'ubuntu-latest'
-        run: |
-          python -m cibuildwheel --output-dir wheelhouse
-        env:
-          CIBW_SKIP: "pp* *i686* *musllinux*"
-          CIBW_PROJECT_REQUIRES_PYTHON: ">=3.8"
-          CIBW_BEFORE_BUILD: pip install --verbose --editable .
-
-      - name: Build wheels for Windows
-        if: matrix.os == 'windows-latest'
-        run: |
-          python -m cibuildwheel --output-dir wheelhouse
-        env:
-          CIBW_SKIP: "pp* *i686* *win32"
-          CIBW_PROJECT_REQUIRES_PYTHON: ">=3.8"
-          CIBW_BEFORE_BUILD: pip install --verbose --editable .
-
-      - name: Build wheels for macOS
-        if: matrix.os == 'macos-latest'
-        run: |
-          python -m cibuildwheel --output-dir wheelhouse
+      - name: Build wheels
+        uses: pypa/[email protected]
         env:
-          CIBW_ARCHS_MACOS: "x86_64 universal2 arm64"
-          CIBW_SKIP: "pp* *i686*"
+          CIBW_BUILD: cp3*-*
+          CIBW_SKIP: pp* *i686* *win32 *musllinux*
           CIBW_PROJECT_REQUIRES_PYTHON: ">=3.8"
           CIBW_BEFORE_BUILD: pip install --verbose --editable .
+          CIBW_ARCHS_LINUX: auto64
+          CIBW_ARCHS_MACOS: x86_64 universal2 arm64
+          CIBW_ARCHS_WINDOWS: auto64
 
-      - uses: actions/upload-artifact@v3
+      - uses: actions/upload-artifact@v4
         with:
+          name: artifact-wheels-${{ matrix.os }}
           path: ./wheelhouse/*.whl
 
+  build_sdist:
+      name: Build source distribution
+      runs-on: ubuntu-latest
+      steps:
+        - uses: actions/checkout@v4
+
+        - uses: actions/setup-python@v5
+          name: Install Python
+          with:
+            python-version: '3.12'
+
+        - run: pip install build
+
+        - name: Build sdist
+          run: python -m build --sdist
+
+        - uses: actions/upload-artifact@v4
+          with:
+            name: artifact-source
+            path: dist/*.tar.gz
+
   upload_pypi:
-    needs: build_wheels
+    needs: [build_wheels, build_sdist]
     runs-on: ubuntu-latest
 
     if: github.event_name == 'push' && startsWith(github.event.ref, 'refs/tags/v')
     steps:
-      - uses: actions/checkout@v4
-
-      - uses: actions/download-artifact@v3
+      - uses: actions/download-artifact@v4
         with:
-          name: artifact
           path: dist
+          pattern: artifact-*
+          merge-multiple: true
 
-      - name: Install dependencies
-        run: |
-          python -m pip install --upgrade pip
-          pip install build
-          pip install twine
-
-      - name: Publish
-        env:
-          TWINE_USERNAME: __token__
-          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
-        run: |
-          python -m build --sdist
-          twine upload dist/*
+      - uses: pypa/[email protected]
+        with:
+          user: __token__
+          password: ${{ secrets.PYPI_PASSWORD }}
+          # To test, uncomment the following:
+          # password: ${{ secrets.TEST_PYPI_PASSWORD }}
+          # repository-url: https://test.pypi.org/legacy/
diff --git a/CONTRIBUTING b/CONTRIBUTING
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
@@ -0,0 +1,68 @@
+# Contributions
+
+Contributions are welcome, encouraged, and appreciated!
+
+If you encounter any bugs while using the project, or believe there's a feature that would prove useful, feel free to [submit a new issue](https://github.com/zillow/quantile-forest/issues/new/choose).
+
+All contributions, suggestions, and feedback you submitted are accepted under the [project's license](https://github.com/zillow/quantile-forest/blob/main/LICENSE).
+
+## Submitting an Issue
+
+[Issues](https://github.com/zillow/quantile-forest/issues) should be used to report problems with the package or any of its dependencies, request a new feature, or to discuss potential changes before a PR is created. 
+
+When reporting bugs, please provide a [minimal reproducible example](https://stackoverflow.com/help/minimal-reproducible-example), the version of the package, and the environment (e.g., Python version).
+
+If you find an existing Issue that addresses the problem you're having, please add your own reproducible example to the existing issue rather than creating a new one. 
+
+## Submitting a Pull Request
+
+[PRs](https://github.com/zillow/quantile-forest/pulls) are always welcome and can be a quick way to get your fix or improvement merged. In general, PRs should:
+
+- Only fix/add the functionality in question.
+- Address a single concern in the fewest number of changes possible.
+- Include updated documentation.
+
+For changes that address core functionality or would require breaking changes (e.g., a major release), it's generally best to first open an Issue to discuss your proposed changes.
+
+In general, we follow the ["fork-and-pull" Git workflow](https://gist.github.com/Chaser324/ce0505fbed06b947d962)
+
+- [Fork](https://github.com/zillow/quantile-forest/fork) the repository to your own GitHub account
+- Clone the project to your machine
+- Create a branch locally with a succinct but descriptive name
+- Commit changes to the branch
+- Following any formatting and testing guidelines specific to this repo
+- Push changes to your fork
+- Open a PR in our repository
+
+## Setting Up Your Environment
+
+To contribute to the `quantile-forest` source code, start by forking and then cloning the repository (i.e. `git clone [email protected]:YourUsername/quantile-forest.git`)
+
+Once inside the repository, to build and install the package, run:
+
+```cmd
+python setup.py build_ext --inplace
+python setup.py install
+ ```
+
+## Testing Your Changes
+
+To execute unit tests from the `quantile-forest` repository, run:
+
+```cmd
+pytest quantile_forest -v
+```
+
+## Troubleshooting
+
+If the build fails because SciPy is not installed, ensure OpenBLAS and LAPACK are available and accessible.
+
+On macOS, run:
+
+```cmd
+brew install openblas
+brew install lapack
+export SYSTEM_VERSION_COMPAT=1
+```
+
+Then try rebuilding.
diff --git a/docs/conf.py b/docs/conf.py
@@ -184,15 +184,13 @@
 
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
-man_pages = [
-    (
-        "index",
-        "quantile-forest",
-        "quantile-forest Documentation",
-        ["Zillow Group"],
-        1,
-    )
-]
+man_pages = [(
+    "index",
+    "quantile-forest",
+    "quantile-forest Documentation",
+    ["Zillow Group"],
+    1,
+)]
 
 # If true, show URL addresses after external links.
 # man_show_urls = False

diff --git a/docs/references.rst b/docs/references.rst
@@ -0,0 +1,11 @@
+:orphan:
+
+.. title:: References
+
+.. _references:
+
+==========
+References
+==========
+
+.. bibliography::
diff --git a/docs/user_guide.rst b/docs/user_guide.rst
@@ -22,15 +22,15 @@ Quantile Regression Forests
 
 A standard decision tree can be extended in a straightforward way to estimate conditional quantiles. When a decision tree is fit, rather than storing only the sufficient statistics of the response variable at the leaf node, such as the mean and variance, all of the response values can be stored with the leaf node. At prediction time, these values can then be used to calculate empirical quantile estimates.
 
-The quantile-based approach can be extended to random forests. To estimate :math:`F(Y=y|x) = q`, each response value in `y_train` is given a weight or frequency. Formally, the weight or frequency given to the :math:`j`\th sample of `y_train`, :math:`y_j`, while estimating the quantile is
+The quantile-based approach can be extended to random forests. To estimate :math:`F(Y=y|x) = q`, each response value in the training set is given a weight or frequency. Formally, the weight or frequency given to the :math:`j`\th training sample, :math:`y_j`, while estimating the quantile is
 
 .. math::
 
   \frac{1}{T} \sum_{t=1}^{T} \frac{\mathbb{1}(y_j \in L(x))}{\sum_{i=1}^N \mathbb{1}(y_i \in L(x))},
 
 where :math:`L(x)` denotes the leaf that :math:`x` falls into.
 
-Informally, this means that given a new unknown sample, we first find the leaf that it falls into at each tree. Then for each `(X, y)` pair in the training data, a weight or frequency is given to `y` for each tree based on the number of times each training sample falls into the same leaf as the new sample. This information can then be used to calculate the empirical quantile estimates.
+Informally, this means that given a new unknown sample, we first find the leaf that it falls into for each tree in the ensemble. Each training sample :math:`y_j` that falls into the same leaf as the new sample is given a weight that equals the fraction of samples in the leaf. Each :math:`y_j` that does not fall into the same leaf as the new sample is given a weight or frequency of zero. The weights or frequencies for each :math:`y_j` are then summed or aggregated across all of the trees in the ensemble. This information can then be used to calculate the empirical quantile estimates.
 
 This approach was first proposed by :cite:t:`2006:meinshausen`.
 
@@ -163,7 +163,3 @@ The maximum number of proximity counts output per test sample can be limited by
 Out-of-bag (OOB) proximity counts can be returned by specifying `oob_score = True`::
 
     >>> proximities = reg.proximity_counts(X_train, oob_score=True)
-
-References
-----------
-.. bibliography::
diff --git a/examples/plot_quantile_conformalized.py b/examples/plot_quantile_conformalized.py
@@ -9,6 +9,7 @@
 while QRF may require additional calibration for reliable interval estimates.
 This example uses MAPIE to construct the CQR interval estimates with a QRF.
 """
+
 print(__doc__)
 
 import warnings
@@ -187,7 +188,7 @@ def plot_prediction_intervals(
 
 coords = [axs[0], axs[1]]
 num_plots = rng.choice(len(y_test), int(len(y_test)), replace=False)
-usd_formatter = FuncFormatter(lambda x, p: f"${format(int(x) * 100, ',')}k")
+usd_formatter = FuncFormatter(lambda x, p: f"${format(int(x * 100), ',')}k")
 
 for strategy, coord in zip(strategies.keys(), coords):
     plot_prediction_intervals(

diff --git a/examples/plot_quantile_extrapolation_problem.py b/examples/plot_quantile_extrapolation_problem.py
@@ -9,6 +9,7 @@
 approach.
 
 """
+
 print(__doc__)
 
 import matplotlib.pyplot as plt

diff --git a/examples/plot_quantile_interpolation.py b/examples/plot_quantile_interpolation.py
@@ -7,6 +7,7 @@
 prediction when the desired quantile lies between two data points.
 
 """
+
 print(__doc__)
 
 import matplotlib.pyplot as plt

diff --git a/examples/plot_quantile_regression_intervals.py b/examples/plot_quantile_regression_intervals.py
@@ -7,6 +7,7 @@
 intervals on the California Housing dataset.
 
 """
+
 print(__doc__)
 
 import matplotlib.pyplot as plt
@@ -106,7 +107,7 @@ def plot_intervals(ax, y_true, y_pred_low, y_pred_upp, price_formatter):
 
     fig, (ax1, ax2) = plt.subplots(nrows=1, ncols=2, figsize=(10, 4))
 
-    usd_formatter = FuncFormatter(lambda x, p: f"${format(int(x) * 100, ',')}k")
+    usd_formatter = FuncFormatter(lambda x, p: f"${format(int(x * 100), ',')}k")
 
     y_pred_interval = y_pred_upp - y_pred_low
     sort_idx = np.argsort(y_pred)

diff --git a/examples/plot_quantile_toy_example.py b/examples/plot_quantile_toy_example.py
@@ -8,6 +8,7 @@
 the predictions to a ground truth function used to generate noisy samples.
 
 """
+
 print(__doc__)
 
 import matplotlib.pyplot as plt
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,6 +9,7 @@ @@
     approach.
     """
     print(__doc__)
     import matplotlib.pyplot as plt
@@ Expand Down @@